diff --git a/Annex/AdjustedBranch.hs b/Annex/AdjustedBranch.hs index a6656ec08e..7f623c4139 100644 --- a/Annex/AdjustedBranch.hs +++ b/Annex/AdjustedBranch.hs @@ -558,8 +558,8 @@ reverseAdjustedCommit commitparent adj (csha, basiscommit) origbranch reverseAdjustedTree :: Sha -> Adjustment -> Sha -> Annex Sha reverseAdjustedTree basis adj csha = do (diff, cleanup) <- inRepo (Git.DiffTree.commitDiff csha) - let (adds, others) = partition (\dti -> Git.DiffTree.srcsha dti == nullSha) diff - let (removes, changes) = partition (\dti -> Git.DiffTree.dstsha dti == nullSha) others + let (adds, others) = partition (\dti -> Git.DiffTree.srcsha dti `elem` nullShas) diff + let (removes, changes) = partition (\dti -> Git.DiffTree.dstsha dti `elem` nullShas) others adds' <- catMaybes <$> mapM (adjustTreeItem reverseadj) (map diffTreeToTreeItem adds) treesha <- Git.Tree.adjustTree diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 6934e62bab..91d0276daa 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -577,10 +577,11 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do -} run [] = noop run changers = do + config <- Annex.getGitConfig trustmap <- calcTrustMap <$> getStaged trustLog remoteconfigmap <- calcRemoteConfigMap <$> getStaged remoteLog -- partially apply, improves performance - let changers' = map (\c -> c trustmap remoteconfigmap) changers + let changers' = map (\c -> c config trustmap remoteconfigmap) changers fs <- branchFiles forM_ fs $ \f -> do content <- getStaged f diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs index a360919890..98b7b635a4 100644 --- a/Annex/Branch/Transitions.hs +++ b/Annex/Branch/Transitions.hs @@ -22,6 +22,8 @@ import Types.TrustLevel import Types.UUID import Types.MetaData import Types.Remote +import Types.GitConfig (GitConfig) +import Types.ProposedAccepted import Annex.SpecialRemote.Config import qualified Data.Map as M @@ -34,7 +36,7 @@ data FileTransition = ChangeFile Builder | PreserveFile -type TransitionCalculator = TrustMap -> M.Map UUID RemoteConfig -> RawFilePath -> L.ByteString -> FileTransition +type TransitionCalculator = GitConfig -> TrustMap -> M.Map UUID RemoteConfig -> RawFilePath -> L.ByteString -> FileTransition getTransitionCalculator :: Transition -> Maybe TransitionCalculator getTransitionCalculator ForgetGitHistory = Nothing @@ -53,7 +55,7 @@ getTransitionCalculator ForgetDeadRemotes = Just dropDead -- is not removed from the remote log, for the same reason the trust log -- is not changed. dropDead :: TransitionCalculator -dropDead trustmap remoteconfigmap f content = case getLogVariety f of +dropDead gc trustmap remoteconfigmap f content = case getLogVariety gc f of Just OldUUIDBasedLog | f == trustLog -> PreserveFile | f == remoteLog -> ChangeFile $ @@ -85,7 +87,7 @@ dropDead trustmap remoteconfigmap f content = case getLogVariety f of trustmap' = trustmap `M.union` M.map (const DeadTrusted) (M.filter sameasdead remoteconfigmap) sameasdead cm = - case toUUID <$> M.lookup sameasUUIDField cm of + case toUUID . fromProposedAccepted <$> M.lookup sameasUUIDField cm of Nothing -> False Just u' -> M.lookup u' trustmap == Just DeadTrusted minimizesameasdead u l diff --git a/Annex/Content.hs b/Annex/Content.hs index 7c57cf5040..9615513669 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -780,13 +780,12 @@ saveState nocommit = doSideAction $ do {- Downloads content from any of a list of urls, displaying a progress - meter. -} -downloadUrl :: Key -> MeterUpdate -> [Url.URLString] -> FilePath -> Annex Bool -downloadUrl k p urls file = +downloadUrl :: Key -> MeterUpdate -> [Url.URLString] -> FilePath -> Url.UrlOptions -> Annex Bool +downloadUrl k p urls file uo = -- Poll the file to handle configurations where an external -- download command is used. meteredFile file (Just p) k $ - Url.withUrlOptions $ \uo -> - anyM (\u -> Url.download p u file uo) urls + anyM (\u -> Url.download p u file uo) urls {- Copies a key's content, when present, to a temp file. - This is used to speed up some rsyncs. -} diff --git a/Annex/Content/PointerFile.hs b/Annex/Content/PointerFile.hs index 997f731ca6..cf66801d94 100644 --- a/Annex/Content/PointerFile.hs +++ b/Annex/Content/PointerFile.hs @@ -22,7 +22,9 @@ import Annex.ReplaceFile import Annex.InodeSentinal import Annex.Content.LowLevel import Utility.InodeCache +#if ! defined(mingw32_HOST_OS) import Utility.Touch +#endif {- Populates a pointer file with the content of a key. - diff --git a/Annex/Drop.hs b/Annex/Drop.hs index 52c6f02bb7..af603ac981 100644 --- a/Annex/Drop.hs +++ b/Annex/Drop.hs @@ -16,7 +16,7 @@ import qualified Remote import qualified Command.Drop import Command import Annex.Wanted -import Config +import Annex.SpecialRemote.Config import qualified Database.Keys import Git.FilePath diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index fe2c2e6923..2a265ae46b 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -37,8 +37,10 @@ import Types.Group import Types.FileMatcher import Types.GitConfig import Config.GitConfig +import Annex.SpecialRemote.Config (preferreddirField) import Git.FilePath import Types.Remote (RemoteConfig) +import Types.ProposedAccepted import Annex.CheckAttr import Git.CheckAttr (unspecifiedAttr) import qualified Git.Config @@ -155,8 +157,8 @@ preferredContentKeylessTokens pcd = , SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir) ] ++ commonKeylessTokens LimitAnnexFiles where - preferreddir = fromMaybe "public" $ - M.lookup "preferreddir" =<< (`M.lookup` configMap pcd) =<< repoUUID pcd + preferreddir = maybe "public" fromProposedAccepted $ + M.lookup preferreddirField =<< (`M.lookup` configMap pcd) =<< repoUUID pcd preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)] preferredContentKeyedTokens pcd = diff --git a/Annex/Import.hs b/Annex/Import.hs index 7c0f88164b..0f4673a2f9 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -8,7 +8,6 @@ {-# LANGUAGE OverloadedStrings #-} module Annex.Import ( - importTree, ImportTreeConfig(..), ImportCommitConfig(..), buildImportCommit, @@ -37,7 +36,6 @@ import Annex.Export import Annex.RemoteTrackingBranch import Command import Backend -import Config import Types.Key import Types.KeySource import Messages.Progress diff --git a/Annex/SpecialRemote.hs b/Annex/SpecialRemote.hs index 4d5cbb4a77..4e55c399b6 100644 --- a/Annex/SpecialRemote.hs +++ b/Annex/SpecialRemote.hs @@ -17,6 +17,7 @@ import Annex.SpecialRemote.Config import Remote (remoteTypes) import Types.Remote (RemoteConfig, SetupStage(..), typename, setup) import Types.GitConfig +import Types.ProposedAccepted import Config import Remote.List import Logs.Remote @@ -49,10 +50,10 @@ newConfig -- when sameas is used -> RemoteConfig newConfig name sameas fromuser m = case sameas of - Nothing -> M.insert nameField name fromuser + Nothing -> M.insert nameField (Proposed name) fromuser Just (Sameas u) -> addSameasInherited m $ M.fromList - [ (sameasNameField, name) - , (sameasUUIDField, fromUUID u) + [ (sameasNameField, Proposed name) + , (sameasUUIDField, Proposed (fromUUID u)) ] `M.union` fromuser specialRemoteMap :: Annex (M.Map UUID RemoteName) @@ -66,11 +67,15 @@ specialRemoteMap = do {- find the remote type -} findType :: RemoteConfig -> Either String RemoteType -findType config = maybe unspecified specified $ M.lookup typeField config +findType config = maybe unspecified (specified . fromProposedAccepted) $ + M.lookup typeField config where unspecified = Left "Specify the type of remote with type=" specified s = case filter (findtype s) remoteTypes of - [] -> Left $ "Unknown remote type " ++ s + [] -> Left $ "Unknown remote type " ++ s + ++ " (pick from: " + ++ intercalate " " (map typename remoteTypes) + ++ ")" (t:_) -> Right t findtype s i = typename i == s @@ -90,11 +95,12 @@ autoEnable = do Left e -> warning (show e) Right (_c, _u) -> when (cu /= u) $ - setConfig (remoteConfig c "config-uuid") (fromUUID cu) + setConfig (remoteAnnexConfig c "config-uuid") (fromUUID cu) _ -> return () where configured rc = fromMaybe False $ - Git.Config.isTrueFalse =<< M.lookup autoEnableField rc + Git.Config.isTrueFalse . fromProposedAccepted + =<< M.lookup autoEnableField rc canenable u = (/= DeadTrusted) <$> lookupTrust u getenabledremotes = M.fromList . map (\r -> (getcu r, r)) diff --git a/Annex/SpecialRemote/Config.hs b/Annex/SpecialRemote/Config.hs index e09ae8ecc7..d4f8da22a8 100644 --- a/Annex/SpecialRemote/Config.hs +++ b/Annex/SpecialRemote/Config.hs @@ -1,18 +1,27 @@ {- git-annex special remote configuration - - - Copyright 2019 Joey Hess + - Copyright 2019-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + module Annex.SpecialRemote.Config where import Common import Types.Remote (RemoteConfigField, RemoteConfig) import Types.UUID +import Types.ProposedAccepted +import Types.RemoteConfig +import Config +import qualified Git.Config import qualified Data.Map as M import qualified Data.Set as S +import Data.Typeable +import GHC.Stack newtype Sameas t = Sameas t deriving (Show) @@ -22,44 +31,84 @@ newtype ConfigFrom t = ConfigFrom t {- The name of a configured remote is stored in its config using this key. -} nameField :: RemoteConfigField -nameField = "name" +nameField = Accepted "name" {- The name of a sameas remote is stored using this key instead. - This prevents old versions of git-annex getting confused. -} sameasNameField :: RemoteConfigField -sameasNameField = "sameas-name" +sameasNameField = Accepted "sameas-name" lookupName :: RemoteConfig -> Maybe String -lookupName c = M.lookup nameField c <|> M.lookup sameasNameField c +lookupName c = fmap fromProposedAccepted $ + M.lookup nameField c <|> M.lookup sameasNameField c + +instance RemoteNameable RemoteConfig where + getRemoteName c = fromMaybe "" (lookupName c) {- The uuid that a sameas remote is the same as is stored in this key. -} sameasUUIDField :: RemoteConfigField -sameasUUIDField = "sameas-uuid" +sameasUUIDField = Accepted "sameas-uuid" {- The type of a remote is stored in its config using this key. -} typeField :: RemoteConfigField -typeField = "type" +typeField = Accepted "type" autoEnableField :: RemoteConfigField -autoEnableField = "autoenable" +autoEnableField = Accepted "autoenable" encryptionField :: RemoteConfigField -encryptionField = "encryption" +encryptionField = Accepted "encryption" macField :: RemoteConfigField -macField = "mac" +macField = Accepted "mac" cipherField :: RemoteConfigField -cipherField = "cipher" +cipherField = Accepted "cipher" cipherkeysField :: RemoteConfigField -cipherkeysField = "cipherkeys" +cipherkeysField = Accepted "cipherkeys" pubkeysField :: RemoteConfigField -pubkeysField = "pubkeys" +pubkeysField = Accepted "pubkeys" + +chunkField :: RemoteConfigField +chunkField = Accepted "chunk" chunksizeField :: RemoteConfigField -chunksizeField = "chunksize" +chunksizeField = Accepted "chunksize" + +embedCredsField :: RemoteConfigField +embedCredsField = Accepted "embedcreds" + +preferreddirField :: RemoteConfigField +preferreddirField = Accepted "preferreddir" + +exportTreeField :: RemoteConfigField +exportTreeField = Accepted "exporttree" + +importTreeField :: RemoteConfigField +importTreeField = Accepted "importtree" + +exportTree :: ParsedRemoteConfig -> Bool +exportTree = fromMaybe False . getRemoteConfigValue exportTreeField + +importTree :: ParsedRemoteConfig -> Bool +importTree = fromMaybe False . getRemoteConfigValue importTreeField + +{- Parsers for fields that are common to all special remotes. -} +commonFieldParsers :: [RemoteConfigFieldParser] +commonFieldParsers = + [ optionalStringParser nameField + (FieldDesc "name for the special remote") + , optionalStringParser sameasNameField HiddenField + , optionalStringParser sameasUUIDField HiddenField + , optionalStringParser typeField + (FieldDesc "type of special remote") + , trueFalseParser autoEnableField False + (FieldDesc "automatically enable special remote") + , optionalStringParser preferreddirField + (FieldDesc "directory whose content is preferred") + ] {- A remote with sameas-uuid set will inherit these values from the config - of that uuid. These values cannot be overridden in the remote's config. -} @@ -92,7 +141,8 @@ addSameasInherited m c = case findSameasUUID c of M.restrictKeys parentc sameasInherits findSameasUUID :: RemoteConfig -> Maybe (Sameas UUID) -findSameasUUID c = Sameas . toUUID <$> M.lookup sameasUUIDField c +findSameasUUID c = Sameas . toUUID . fromProposedAccepted + <$> M.lookup sameasUUIDField c {- Remove any fields inherited from a sameas-uuid. When storing a - RemoteConfig, those fields don't get stored, since they were already @@ -108,4 +158,98 @@ findByRemoteConfig matching = map sameasuuid . filter (matching . snd) . M.toLis where sameasuuid (u, c) = case M.lookup sameasUUIDField c of Nothing -> (u, c, Nothing) - Just u' -> (toUUID u', c, Just (ConfigFrom u)) + Just u' -> (toUUID (fromProposedAccepted u'), c, Just (ConfigFrom u)) + +{- Extracts a value from ParsedRemoteConfig. -} +getRemoteConfigValue :: HasCallStack => Typeable v => RemoteConfigField -> ParsedRemoteConfig -> Maybe v +getRemoteConfigValue f m = case M.lookup f m of + Just (RemoteConfigValue v) -> case cast v of + Just v' -> Just v' + Nothing -> error $ unwords + [ "getRemoteConfigValue" + , fromProposedAccepted f + , "found value of unexpected type" + , show (typeOf v) ++ "." + , "This is a bug in git-annex!" + ] + Nothing -> Nothing + +{- Gets all fields that remoteConfigRestPassthrough matched. -} +getRemoteConfigPassedThrough :: ParsedRemoteConfig -> M.Map RemoteConfigField String +getRemoteConfigPassedThrough = M.mapMaybe $ \(RemoteConfigValue v) -> + case cast v of + Just (PassedThrough s) -> Just s + Nothing -> Nothing + +newtype PassedThrough = PassedThrough String + +parseRemoteConfig :: RemoteConfig -> RemoteConfigParser -> Either String ParsedRemoteConfig +parseRemoteConfig c rpc = + go [] c (remoteConfigFieldParsers rpc ++ commonFieldParsers) + where + go l c' [] = + let (passover, leftovers) = partition + (maybe (const False) fst (remoteConfigRestPassthrough rpc) . fst) + (M.toList c') + leftovers' = filter (notaccepted . fst) leftovers + in if not (null leftovers') + then Left $ "Unexpected parameters: " ++ + unwords (map (fromProposedAccepted . fst) leftovers') + else Right $ M.fromList $ + l ++ map (uncurry passthrough) passover + go l c' (p:rest) = do + let f = parserForField p + (valueParser p) (M.lookup f c) c >>= \case + Just v -> go ((f,v):l) (M.delete f c') rest + Nothing -> go l (M.delete f c') rest + + passthrough f v = (f, RemoteConfigValue (PassedThrough (fromProposedAccepted v))) + + notaccepted (Proposed _) = True + notaccepted (Accepted _) = False + +optionalStringParser :: RemoteConfigField -> FieldDesc -> RemoteConfigFieldParser +optionalStringParser f fielddesc = RemoteConfigFieldParser + { parserForField = f + , valueParser = p + , fieldDesc = fielddesc + , valueDesc = Nothing + } + where + p (Just v) _c = Right (Just (RemoteConfigValue (fromProposedAccepted v))) + p Nothing _c = Right Nothing + +yesNoParser :: RemoteConfigField -> Bool -> FieldDesc -> RemoteConfigFieldParser +yesNoParser f v fd = genParser yesNo f v fd + (Just (ValueDesc "yes or no")) + +trueFalseParser :: RemoteConfigField -> Bool -> FieldDesc -> RemoteConfigFieldParser +trueFalseParser f v fd = genParser Git.Config.isTrueFalse f v fd + (Just (ValueDesc "true or false")) + +genParser + :: Typeable t + => (String -> Maybe t) + -> RemoteConfigField + -> t -- ^ fallback value + -> FieldDesc + -> Maybe ValueDesc + -> RemoteConfigFieldParser +genParser parse f fallback fielddesc valuedesc = RemoteConfigFieldParser + { parserForField = f + , valueParser = p + , fieldDesc = fielddesc + , valueDesc = valuedesc + } + where + p Nothing _c = Right (Just (RemoteConfigValue fallback)) + p (Just v) _c = case parse (fromProposedAccepted v) of + Just b -> Right (Just (RemoteConfigValue b)) + Nothing -> case v of + Accepted _ -> Right (Just (RemoteConfigValue fallback)) + Proposed _ -> Left $ + "Bad value for " ++ fromProposedAccepted f ++ + case valuedesc of + Just (ValueDesc vd) -> + " (expected " ++ vd ++ ")" + Nothing -> "" diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs index 9fea51a929..8304e04c1a 100644 --- a/Annex/Ssh.hs +++ b/Annex/Ssh.hs @@ -1,6 +1,6 @@ {- git-annex ssh interface, with connection caching - - - Copyright 2012-2017 Joey Hess + - Copyright 2012-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -98,13 +98,31 @@ consumeStdinParams NoConsumeStdin = [Param "-n"] {- Returns a filename to use for a ssh connection caching socket, and - parameters to enable ssh connection caching. -} sshCachingInfo :: (SshHost, Maybe Integer) -> Annex (Maybe FilePath, [CommandParam]) -sshCachingInfo (host, port) = go =<< sshCacheDir +sshCachingInfo (host, port) = go =<< sshCacheDir' where - go Nothing = return (Nothing, []) - go (Just dir) = + go (Right dir) = liftIO (bestSocketPath $ dir hostport2socket host port) >>= return . \case Nothing -> (Nothing, []) Just socketfile -> (Just socketfile, sshConnectionCachingParams socketfile) + -- No connection caching with concurrency is not a good + -- combination, so warn the user. + go (Left whynocaching) = do + Annex.getState Annex.concurrency >>= \case + NonConcurrent -> return () + Concurrent {} -> warnnocaching whynocaching + ConcurrentPerCpu -> warnnocaching whynocaching + return (Nothing, []) + + warnnocaching whynocaching = do + warning nocachingwarning + warning whynocaching + + nocachingwarning = unwords + [ "You have enabled concurrency, but git-annex is not able" + , "to use ssh connection caching. This may result in" + , "multiple ssh processes prompting for passwords at the" + , "same time." + ] {- Given an absolute path to use for a socket file, - returns whichever is shorter of that or the relative path to the same @@ -133,26 +151,43 @@ sshConnectionCachingParams socketfile = , Param "-o", Param "ControlPersist=yes" ] +sshSocketDirEnv :: String +sshSocketDirEnv = "GIT_ANNEX_SSH_SOCKET_DIR" + {- ssh connection caching creates sockets, so will not work on a - - crippled filesystem. A GIT_ANNEX_TMP_DIR can be provided to use - - a different filesystem. -} + - crippled filesystem. -} sshCacheDir :: Annex (Maybe FilePath) -sshCacheDir - | BuildInfo.sshconnectioncaching = - ifM (fromMaybe True . annexSshCaching <$> Annex.getGitConfig) - ( ifM crippledFileSystem - ( maybe (return Nothing) usetmpdir =<< gettmpdir - , Just <$> fromRepo gitAnnexSshDir - ) - , return Nothing +sshCacheDir = eitherToMaybe <$> sshCacheDir' + +sshCacheDir' :: Annex (Either String FilePath) +sshCacheDir' = + ifM (fromMaybe BuildInfo.sshconnectioncaching . annexSshCaching <$> Annex.getGitConfig) + ( ifM crippledFileSystem + ( gettmpdir >>= \case + Nothing -> + return (Left crippledfswarning) + Just tmpdir -> + liftIO $ catchMsgIO $ + usetmpdir tmpdir + , Right <$> fromRepo gitAnnexSshDir ) - | otherwise = return Nothing + , return (Left "annex.sshcaching is not set to true") + ) where - gettmpdir = liftIO $ getEnv "GIT_ANNEX_TMP_DIR" - usetmpdir tmpdir = liftIO $ catchMaybeIO $ do + gettmpdir = liftIO $ getEnv sshSocketDirEnv + + usetmpdir tmpdir = do let socktmp = tmpdir "ssh" createDirectoryIfMissing True socktmp return socktmp + + crippledfswarning = unwords + [ "This repository is on a crippled filesystem, so unix named" + , "pipes probably don't work, and ssh connection caching" + , "relies on those. One workaround is to set" + , sshSocketDirEnv + , "to point to a directory on a non-crippled filesystem." + ] portParams :: Maybe Integer -> [CommandParam] portParams Nothing = [] diff --git a/Annex/UUID.hs b/Annex/UUID.hs index f3fc4c8acf..1032acd14e 100644 --- a/Annex/UUID.hs +++ b/Annex/UUID.hs @@ -81,7 +81,7 @@ getRepoUUID r = do updatecache u = do g <- gitRepo when (g /= r) $ storeUUIDIn cachekey u - cachekey = remoteConfig r "uuid" + cachekey = remoteAnnexConfig r "uuid" removeRepoUUID :: Annex () removeRepoUUID = do diff --git a/Annex/Url.hs b/Annex/Url.hs index bcc6a747f5..d7be5d243c 100644 --- a/Annex/Url.hs +++ b/Annex/Url.hs @@ -1,13 +1,14 @@ {- Url downloading, with git-annex user agent and configured http - headers, security restrictions, etc. - - - Copyright 2013-2019 Joey Hess + - Copyright 2013-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} module Annex.Url ( withUrlOptions, + withUrlOptionsPromptingCreds, getUrlOptions, getUserAgent, ipAddressesUnlimited, @@ -34,6 +35,7 @@ import qualified Utility.Url as U import Utility.IPAddress import Utility.HttpManagerRestricted import Utility.Metered +import Git.Credential import qualified BuildInfo import Network.Socket @@ -64,6 +66,7 @@ getUrlOptions = Annex.getState Annex.urloptions >>= \case <*> pure urldownloader <*> pure manager <*> (annexAllowedUrlSchemes <$> Annex.getGitConfig) + <*> pure U.noBasicAuth headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd]) @@ -124,6 +127,24 @@ ipAddressesUnlimited = withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a withUrlOptions a = a =<< getUrlOptions +-- When downloading an url, if authentication is needed, uses +-- git-credential to prompt for username and password. +withUrlOptionsPromptingCreds :: (U.UrlOptions -> Annex a) -> Annex a +withUrlOptionsPromptingCreds a = do + g <- Annex.gitRepo + uo <- getUrlOptions + prompter <- mkPrompter + a $ uo + { U.getBasicAuth = \u -> prompter $ + getBasicAuthFromCredential g u + -- Can't download with curl and handle basic auth, + -- so make sure it uses conduit. + , U.urlDownloader = case U.urlDownloader uo of + U.DownloadWithCurl _ -> U.DownloadWithConduit $ + U.DownloadWithCurlRestricted mempty + v -> v + } + checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool checkBoth url expected_size uo = liftIO (U.checkBoth url expected_size uo) >>= \case diff --git a/Annex/View.hs b/Annex/View.hs index d1f41c42d3..190c92165a 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -396,12 +396,12 @@ withViewChanges addmeta removemeta = do void $ liftIO cleanup where handleremovals item - | DiffTree.srcsha item /= nullSha = + | DiffTree.srcsha item `notElem` nullShas = handlechange item removemeta =<< catKey (DiffTree.srcsha item) | otherwise = noop handleadds item - | DiffTree.dstsha item /= nullSha = + | DiffTree.dstsha item `notElem` nullShas = handlechange item addmeta =<< catKey (DiffTree.dstsha item) | otherwise = noop diff --git a/Assistant/DaemonStatus.hs b/Assistant/DaemonStatus.hs index 3e1556328c..27b5a7242e 100644 --- a/Assistant/DaemonStatus.hs +++ b/Assistant/DaemonStatus.hs @@ -19,8 +19,8 @@ import Logs.Trust import Utility.TimeStamp import qualified Remote import qualified Types.Remote as Remote -import Config import Config.DynamicConfig +import Annex.SpecialRemote.Config import Control.Concurrent.STM import System.Posix.Types @@ -60,7 +60,7 @@ calcSyncRemotes = do return $ \dstatus -> dstatus { syncRemotes = syncable - , syncGitRemotes = filter Remote.gitSyncableRemote syncable + , syncGitRemotes = filter (Remote.gitSyncableRemoteType . Remote.remotetype) syncable , syncDataRemotes = dataremotes , exportRemotes = exportremotes , downloadRemotes = contentremotes diff --git a/Assistant/Gpg.hs b/Assistant/Gpg.hs index 6215fba389..01226e0640 100644 --- a/Assistant/Gpg.hs +++ b/Assistant/Gpg.hs @@ -11,6 +11,7 @@ import Utility.Gpg import Utility.UserInfo import Types.Remote (RemoteConfigField) import Annex.SpecialRemote.Config +import Types.ProposedAccepted import qualified Data.Map as M import Control.Applicative @@ -31,7 +32,7 @@ data EnableEncryption = HybridEncryption | SharedEncryption | NoEncryption deriving (Eq) {- Generates Remote configuration for encryption. -} -configureEncryption :: EnableEncryption -> (RemoteConfigField, String) -configureEncryption SharedEncryption = (encryptionField, "shared") -configureEncryption NoEncryption = (encryptionField, "none") -configureEncryption HybridEncryption = (encryptionField, "hybrid") +configureEncryption :: EnableEncryption -> (RemoteConfigField, ProposedAccepted String) +configureEncryption SharedEncryption = (encryptionField, Proposed "shared") +configureEncryption NoEncryption = (encryptionField, Proposed "none") +configureEncryption HybridEncryption = (encryptionField, Proposed "hybrid") diff --git a/Assistant/MakeRemote.hs b/Assistant/MakeRemote.hs index ba4df37f97..3b36ef9a51 100644 --- a/Assistant/MakeRemote.hs +++ b/Assistant/MakeRemote.hs @@ -30,6 +30,7 @@ import Assistant.Gpg import Utility.Gpg (KeyId) import Types.GitConfig import Config +import Types.ProposedAccepted import qualified Data.Map as M @@ -59,19 +60,19 @@ makeRsyncRemote name location = makeRemote name location $ const $ void $ go (Just (u, c, mcu)) = setupSpecialRemote name Rsync.remote config Nothing (Just u, R.Enable c, c) mcu config = M.fromList - [ (encryptionField, "shared") - , ("rsyncurl", location) - , ("type", "rsync") + [ (encryptionField, Proposed "shared") + , (Proposed "rsyncurl", Proposed location) + , (typeField, Proposed "rsync") ] {- Inits a gcrypt special remote, and returns its name. -} makeGCryptRemote :: RemoteName -> String -> KeyId -> Annex RemoteName makeGCryptRemote remotename location keyid = initSpecialRemote remotename GCrypt.remote Nothing $ M.fromList - [ ("type", "gcrypt") - , ("gitrepo", location) + [ (typeField, Proposed "gcrypt") + , (Proposed "gitrepo", Proposed location) , configureEncryption HybridEncryption - , ("keyid", keyid) + , (Proposed "keyid", Proposed keyid) ] type SpecialRemoteMaker = RemoteName -> RemoteType -> Maybe CredPair -> R.RemoteConfig -> Annex RemoteName @@ -105,14 +106,14 @@ setupSpecialRemote' setdesc name remotetype config mcreds (mu, ss, c) mcu = do - assistant, because otherwise GnuPG may block once the entropy - pool is drained, and as of now there's no way to tell the user - to perform IO actions to refill the pool. -} - let weakc = M.insert "highRandomQuality" "false" $ M.union config c + let weakc = M.insert (Proposed "highRandomQuality") (Proposed "false") (M.union config c) dummycfg <- liftIO dummyRemoteGitConfig (c', u) <- R.setup remotetype ss mu mcreds weakc dummycfg case mcu of Nothing -> configSet u c' Just (Annex.SpecialRemote.ConfigFrom cu) -> do - setConfig (remoteConfig c' "config-uuid") (fromUUID cu) + setConfig (remoteAnnexConfig c' "config-uuid") (fromUUID cu) configSet cu c' when setdesc $ whenM (isNothing . M.lookup u <$> uuidDescMap) $ diff --git a/Assistant/Sync.hs b/Assistant/Sync.hs index 4a90b09943..1bf76c05fc 100644 --- a/Assistant/Sync.hs +++ b/Assistant/Sync.hs @@ -164,7 +164,7 @@ pushToRemotes' now remotes = do updatemap succeeded failed return failed - push branch remote = Command.Sync.pushBranch remote branch + push branch remote = Command.Sync.pushBranch remote (Just branch) parallelPush :: Git.Repo -> [Remote] -> (Remote -> Git.Repo -> IO Bool)-> Assistant ([Remote], [Remote]) parallelPush g rs a = do @@ -265,7 +265,7 @@ changeSyncable (Just r) False = do changeSyncFlag :: Remote -> Bool -> Annex () changeSyncFlag r enabled = do repo <- Remote.getRepo r - let key = Config.remoteConfig repo "sync" + let key = Config.remoteAnnexConfig repo "sync" Config.setConfig key (boolConfig enabled) void Remote.remoteListRefresh diff --git a/Assistant/WebApp/Configurators/AWS.hs b/Assistant/WebApp/Configurators/AWS.hs index c924a78800..92717bfe64 100644 --- a/Assistant/WebApp/Configurators/AWS.hs +++ b/Assistant/WebApp/Configurators/AWS.hs @@ -25,6 +25,7 @@ import Creds import Assistant.Gpg import Git.Types (RemoteName) import Annex.SpecialRemote.Config +import Types.ProposedAccepted import qualified Data.Text as T import qualified Data.Map as M @@ -131,10 +132,10 @@ postAddS3R = awsConfigurator $ do let name = T.unpack $ repoName input makeAWSRemote initSpecialRemote S3.remote TransferGroup (extractCreds input) name $ M.fromList [ configureEncryption $ enableEncryption input - , ("type", "S3") - , ("datacenter", T.unpack $ datacenter input) - , ("storageclass", show $ storageClass input) - , ("chunk", "1MiB") + , (typeField, Proposed "S3") + , (Proposed "datacenter", Proposed $ T.unpack $ datacenter input) + , (Proposed "storageclass", Proposed $ show $ storageClass input) + , (Proposed "chunk", Proposed "1MiB") ] _ -> $(widgetFile "configurators/adds3") #else @@ -155,8 +156,8 @@ postAddGlacierR = glacierConfigurator $ do let name = T.unpack $ repoName input makeAWSRemote initSpecialRemote Glacier.remote SmallArchiveGroup (extractCreds input) name $ M.fromList [ configureEncryption $ enableEncryption input - , ("type", "glacier") - , ("datacenter", T.unpack $ datacenter input) + , (typeField, Proposed "glacier") + , (Proposed "datacenter", Proposed $ T.unpack $ datacenter input) ] _ -> $(widgetFile "configurators/addglacier") #else @@ -167,7 +168,13 @@ getEnableS3R :: UUID -> Handler Html #ifdef WITH_S3 getEnableS3R uuid = do m <- liftAnnex readRemoteLog - if maybe False S3.configIA (M.lookup uuid m) + isia <- case M.lookup uuid m of + Just c -> liftAnnex $ do + pc <- either mempty id . parseRemoteConfig c + <$> Remote.configParser S3.remote c + return $ S3.configIA pc + Nothing -> return False + if isia then redirect $ EnableIAR uuid else postEnableS3R uuid #else @@ -222,7 +229,7 @@ makeAWSRemote maker remotetype defaultgroup (AWSCreds ak sk) name config = getRepoInfo :: RemoteConfig -> Widget getRepoInfo c = [whamlet|S3 remote using bucket: #{bucket}|] where - bucket = fromMaybe "" $ M.lookup "bucket" c + bucket = maybe "" fromProposedAccepted $ M.lookup (Accepted "bucket") c #ifdef WITH_S3 previouslyUsedAWSCreds :: Annex (Maybe CredPair) diff --git a/Assistant/WebApp/Configurators/Edit.hs b/Assistant/WebApp/Configurators/Edit.hs index 5f5e9ffed7..60661e1811 100644 --- a/Assistant/WebApp/Configurators/Edit.hs +++ b/Assistant/WebApp/Configurators/Edit.hs @@ -37,7 +37,7 @@ import qualified Git.Command import qualified Git.Config import qualified Annex import Git.Remote -import Remote.Helper.Encryptable (extractCipher) +import Remote.Helper.Encryptable (extractCipher, parseEncryptionConfig) import Types.Crypto import Utility.Gpg import Annex.UUID @@ -46,6 +46,8 @@ import Config import Config.GitConfig import Config.DynamicConfig import Types.Group +import Types.ProposedAccepted +import Annex.SpecialRemote.Config import qualified Data.Text as T import qualified Data.Map as M @@ -125,7 +127,7 @@ setRepoConfig uuid mremote oldc newc = do case M.lookup uuid m of Nothing -> noop Just remoteconfig -> configSet uuid $ - M.insert "preferreddir" dir remoteconfig + M.insert (Proposed "preferreddir") (Proposed dir) remoteconfig when groupChanged $ do liftAnnex $ case repoGroup newc of RepoGroupStandard g -> setStandardGroup uuid g @@ -217,13 +219,21 @@ editForm new (RepoUUID uuid) redirect DashboardR _ -> do let istransfer = repoGroup curr == RepoGroupStandard TransferGroup - config <- liftAnnex $ M.lookup uuid <$> readRemoteLog + config <- liftAnnex $ fromMaybe mempty + . M.lookup uuid + <$> readRemoteLog let repoInfo = getRepoInfo mremote config - let repoEncryption = getRepoEncryption mremote config + let repoEncryption = getRepoEncryption mremote (Just config) $(widgetFile "configurators/edit/repository") editForm _new r@(RepoName _) = page "Edit repository" (Just Configuration) $ do mr <- liftAnnex (repoIdRemote r) - let repoInfo = getRepoInfo mr Nothing + let repoInfo = case mr of + Just rmt -> do + config <- liftAnnex $ fromMaybe mempty + . M.lookup (Remote.uuid rmt) + <$> readRemoteLog + getRepoInfo mr config + Nothing -> getRepoInfo Nothing mempty g <- liftAnnex gitRepo mrepo <- liftAnnex $ maybe (pure Nothing) (Just <$$> Remote.getRepo) mr let sshrepo = maybe False (remoteLocationIsSshUrl . flip parseRemoteLocation g . Git.repoLocation) mrepo @@ -242,17 +252,21 @@ checkAssociatedDirectory cfg (Just r) = do Nothing -> noop _ -> noop -getRepoInfo :: Maybe Remote.Remote -> Maybe Remote.RemoteConfig -> Widget -getRepoInfo (Just r) (Just c) = case M.lookup "type" c of - Just "S3" +getRepoInfo :: Maybe Remote.Remote -> Remote.RemoteConfig -> Widget +getRepoInfo (Just r) c = case fromProposedAccepted <$> M.lookup typeField c of + Just "S3" -> do #ifdef WITH_S3 - | S3.configIA c -> IA.getRepoInfo c + pc <- liftAnnex $ either mempty id . parseRemoteConfig c + <$> Remote.configParser S3.remote c + if S3.configIA pc + then IA.getRepoInfo c + else AWS.getRepoInfo c +#else + AWS.getRepoInfo c #endif - | otherwise -> AWS.getRepoInfo c Just t | t /= "git" -> [whamlet|#{t} remote|] _ -> getGitRepoInfo =<< liftAnnex (Remote.getRepo r) -getRepoInfo (Just r) _ = getRepoInfo (Just r) (Just $ Remote.config r) getRepoInfo _ _ = [whamlet|git repository|] getGitRepoInfo :: Git.Repo -> Widget @@ -261,7 +275,7 @@ getGitRepoInfo r = do [whamlet|git repository located at #{loc}|] getRepoEncryption :: Maybe Remote.Remote -> Maybe Remote.RemoteConfig -> Widget -getRepoEncryption (Just _) (Just c) = case extractCipher c of +getRepoEncryption (Just _) (Just c) = case extractCipher pc of Nothing -> [whamlet|not encrypted|] (Just (SharedCipher _)) -> @@ -269,6 +283,7 @@ getRepoEncryption (Just _) (Just c) = case extractCipher c of (Just (EncryptedCipher _ _ ks)) -> desckeys ks (Just (SharedPubKeyCipher _ ks)) -> desckeys ks where + pc = either mempty id $ parseEncryptionConfig c desckeys (KeyIds { keyIds = ks }) = do cmd <- liftAnnex $ gpgCmd <$> Annex.getGitConfig knownkeys <- liftIO (secretKeys cmd) @@ -291,7 +306,7 @@ getUpgradeRepositoryR r = go =<< liftAnnex (repoIdRemote r) liftAnnex $ do repo <- Remote.getRepo rmt setConfig - (remoteConfig repo "ignore") + (remoteAnnexConfig repo "ignore") (Git.Config.boolConfig False) liftAnnex $ void Remote.remoteListRefresh liftAssistant updateSyncRemotes diff --git a/Assistant/WebApp/Configurators/IA.hs b/Assistant/WebApp/Configurators/IA.hs index 04feb965b6..d34f5d5f7c 100644 --- a/Assistant/WebApp/Configurators/IA.hs +++ b/Assistant/WebApp/Configurators/IA.hs @@ -25,6 +25,7 @@ import Types.Remote (RemoteConfig) import qualified Annex.Url as Url import Creds import Annex.SpecialRemote.Config +import Types.ProposedAccepted import qualified Data.Text as T import qualified Data.Map as M @@ -131,21 +132,22 @@ postAddIAR = iaConfigurator $ do case result of FormSuccess input -> liftH $ do let name = escapeBucket $ T.unpack $ itemName input + let wrap (k, v) = (Proposed k, Proposed v) + let c = map wrap $ catMaybes + [ Just ("type", "S3") + , Just ("host", S3.iaHost) + , Just ("bucket", escapeHeader name) + , Just ("x-archive-meta-title", escapeHeader $ T.unpack $ itemName input) + , if mediaType input == MediaOmitted + then Nothing + else Just ("x-archive-mediatype", formatMediaType $ mediaType input) + , (,) <$> pure "x-archive-meta-collection" <*> collectionMediaType (mediaType input) + -- Make item show up ASAP. + , Just ("x-archive-interactive-priority", "1") + , Just ("preferreddir", name) + ] AWS.makeAWSRemote initSpecialRemote S3.remote PublicGroup (extractCreds input) name $ - M.fromList $ catMaybes - [ Just $ configureEncryption NoEncryption - , Just ("type", "S3") - , Just ("host", S3.iaHost) - , Just ("bucket", escapeHeader name) - , Just ("x-archive-meta-title", escapeHeader $ T.unpack $ itemName input) - , if mediaType input == MediaOmitted - then Nothing - else Just ("x-archive-mediatype", formatMediaType $ mediaType input) - , (,) <$> pure "x-archive-meta-collection" <*> collectionMediaType (mediaType input) - -- Make item show up ASAP. - , Just ("x-archive-interactive-priority", "1") - , Just ("preferreddir", name) - ] + M.fromList $ configureEncryption NoEncryption : c _ -> $(widgetFile "configurators/addia") #else postAddIAR = giveup "S3 not supported by this build" @@ -202,7 +204,7 @@ $if (not exists) have been uploaded, and the Internet Archive has processed them. |] where - bucket = fromMaybe "" $ M.lookup "bucket" c + bucket = maybe "" fromProposedAccepted $ M.lookup (Accepted "bucket") c #ifdef WITH_S3 url = S3.iaItemUrl bucket #else diff --git a/Assistant/WebApp/Configurators/Local.hs b/Assistant/WebApp/Configurators/Local.hs index faf3cde57e..a1f677d18b 100644 --- a/Assistant/WebApp/Configurators/Local.hs +++ b/Assistant/WebApp/Configurators/Local.hs @@ -39,6 +39,7 @@ import Utility.Gpg import qualified Remote.GCrypt as GCrypt import qualified Types.Remote import Utility.Android +import Types.ProposedAccepted import qualified Data.Text as T import qualified Data.Map as M @@ -325,7 +326,7 @@ getFinishAddDriveR drive = go makewith $ const $ do r <- liftAnnex $ addRemote $ enableSpecialRemote remotename' GCrypt.remote Nothing $ M.fromList - [("gitrepo", dir)] + [(Proposed "gitrepo", Proposed dir)] return (u, r) {- Making a new unencrypted repo, or combining with an existing one. -} makeunencrypted = makewith $ \isnew -> (,) diff --git a/Assistant/WebApp/Configurators/Ssh.hs b/Assistant/WebApp/Configurators/Ssh.hs index 9ed76bef48..002f9c2552 100644 --- a/Assistant/WebApp/Configurators/Ssh.hs +++ b/Assistant/WebApp/Configurators/Ssh.hs @@ -20,6 +20,7 @@ import Types.StandardGroups import Utility.UserInfo import Utility.Gpg import Types.Remote (RemoteConfig) +import Types.ProposedAccepted import Git.Types (RemoteName, fromRef, fromConfigKey) import qualified Remote.GCrypt as GCrypt import qualified Annex @@ -177,7 +178,7 @@ postEnableRsyncR = enableSshRemote getsshinput enableRsyncNet enablersync where enablersync sshdata u = redirect $ ConfirmSshR (sshdata { sshCapabilities = [RsyncCapable] }) u - getsshinput = parseSshUrl <=< M.lookup "rsyncurl" + getsshinput = parseSshUrl . fromProposedAccepted <=< M.lookup (Accepted "rsyncurl") {- This only handles gcrypt repositories that are located on ssh servers; - ones on local drives are handled via another part of the UI. -} @@ -191,7 +192,7 @@ postEnableSshGCryptR u = whenGcryptInstalled $ sshConfigurator $ checkExistingGCrypt sshdata' $ giveup "Expected to find an encrypted git repository, but did not." - getsshinput = parseSshUrl <=< M.lookup "gitrepo" + getsshinput = parseSshUrl . fromProposedAccepted <=< M.lookup (Accepted "gitrepo") getEnableSshGitRemoteR :: UUID -> Handler Html getEnableSshGitRemoteR = postEnableSshGitRemoteR @@ -200,7 +201,7 @@ postEnableSshGitRemoteR = enableSshRemote getsshinput enableRsyncNet enablesshgi where enablesshgitremote sshdata u = redirect $ ConfirmSshR sshdata u - getsshinput = parseSshUrl <=< M.lookup "location" + getsshinput = parseSshUrl . fromProposedAccepted <=< M.lookup (Accepted "location") {- To enable a remote that uses ssh as its transport, - parse a config key to get its url, and display a form @@ -424,7 +425,7 @@ getConfirmSshR sshdata u $(widgetFile "configurators/ssh/combine") handleexisting (Just _) = prepSsh False sshdata $ \sshdata' -> do m <- liftAnnex readRemoteLog - case M.lookup "type" =<< M.lookup u m of + case fromProposedAccepted <$> (M.lookup typeField =<< M.lookup u m) of Just "gcrypt" -> combineExistingGCrypt sshdata' u _ -> makeSshRepo ExistingRepo sshdata' @@ -474,7 +475,7 @@ enableGCrypt :: SshData -> RemoteName -> Handler Html enableGCrypt sshdata reponame = setupRemote postsetup Nothing Nothing mk where mk = enableSpecialRemote reponame GCrypt.remote Nothing $ - M.fromList [("gitrepo", genSshUrl sshdata)] + M.fromList [(Proposed "gitrepo", Proposed (genSshUrl sshdata))] postsetup _ = redirect DashboardR {- Combining with a gcrypt repository that may not be @@ -546,11 +547,11 @@ makeSshRepo rs sshdata setup r = do m <- readRemoteLog let c = fromMaybe M.empty (M.lookup (Remote.uuid r) m) - let c' = M.insert "location" (genSshUrl sshdata) $ - M.insert "type" "git" $ - case M.lookup nameField c of + let c' = M.insert (Proposed "location") (Proposed (genSshUrl sshdata)) $ + M.insert typeField (Proposed "git") $ + case fromProposedAccepted <$> M.lookup nameField c of Just _ -> c - Nothing -> M.insert nameField (Remote.name r) c + Nothing -> M.insert nameField (Proposed (Remote.name r)) c configSet (Remote.uuid r) c' makeSshRepoConnection :: RepoStatus -> Annex RemoteName -> (Remote -> Annex ()) -> Handler Html diff --git a/Assistant/WebApp/Configurators/WebDAV.hs b/Assistant/WebApp/Configurators/WebDAV.hs index cec43e1a5f..37ff804557 100644 --- a/Assistant/WebApp/Configurators/WebDAV.hs +++ b/Assistant/WebApp/Configurators/WebDAV.hs @@ -15,13 +15,14 @@ import Creds import qualified Remote.WebDAV as WebDAV import Assistant.WebApp.MakeRemote import qualified Remote -import Types.Remote (RemoteConfig) +import Types.Remote (RemoteConfig, configParser) import Types.StandardGroups import Logs.Remote import Git.Types (RemoteName) import Assistant.Gpg import Types.GitConfig import Annex.SpecialRemote.Config +import Types.ProposedAccepted import qualified Data.Map as M #endif @@ -58,10 +59,12 @@ postEnableWebDAVR uuid = do m <- liftAnnex readRemoteLog let c = fromJust $ M.lookup uuid m let name = fromJust $ lookupName c - let url = fromJust $ M.lookup "url" c + let url = fromProposedAccepted $ fromJust $ M.lookup (Accepted "url") c mcreds <- liftAnnex $ do dummycfg <- liftIO dummyRemoteGitConfig - getRemoteCredPairFor "webdav" c dummycfg (WebDAV.davCreds uuid) + pc <- either mempty id . parseRemoteConfig c + <$> configParser WebDAV.remote c + getRemoteCredPairFor "webdav" pc dummycfg (WebDAV.davCreds uuid) case mcreds of Just creds -> webDAVConfigurator $ liftH $ makeWebDavRemote enableSpecialRemote name creds M.empty diff --git a/Assistant/WebApp/RepoList.hs b/Assistant/WebApp/RepoList.hs index 092557d578..f388dd77b9 100644 --- a/Assistant/WebApp/RepoList.hs +++ b/Assistant/WebApp/RepoList.hs @@ -26,6 +26,7 @@ import Assistant.Sync import Config.Cost import Utility.NotificationBroadcaster import qualified Git +import Types.ProposedAccepted import qualified Data.Map as M import qualified Data.Set as S @@ -175,7 +176,7 @@ repoList reposelector selectedremote (Just (iscloud, _)) | onlyCloud reposelector = iscloud | otherwise = True - findinfo m g u = case getconfig "type" of + findinfo m g u = case fromProposedAccepted <$> getconfig (Accepted "type") of Just "rsync" -> val True EnableRsyncR Just "directory" -> val False EnableDirectoryR #ifdef WITH_S3 @@ -188,12 +189,12 @@ repoList reposelector Just "gcrypt" -> -- Skip gcrypt repos on removable drives; -- handled separately. - case getconfig "gitrepo" of + case fromProposedAccepted <$> getconfig (Accepted "gitrepo") of Just rr | remoteLocationIsUrl (parseRemoteLocation rr g) -> val True EnableSshGCryptR _ -> Nothing Just "git" -> - case getconfig "location" of + case fromProposedAccepted <$> getconfig (Accepted "location") of Just loc | remoteLocationIsSshUrl (parseRemoteLocation loc g) -> val True EnableSshGitRemoteR _ -> Nothing diff --git a/CHANGELOG b/CHANGELOG index c888b9966b..c513c0cf57 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -git-annex (8.20191107) UNRELEASED; urgency=medium +git-annex (8.20200221) UNRELEASED; urgency=medium * New v8 repository version. * v7 upgrades automatically to v8. @@ -23,13 +23,72 @@ git-annex (8.20191107) UNRELEASED; urgency=medium -- Joey Hess Tue, 29 Oct 2019 15:13:03 -0400 -git-annex (7.20191231) UNRELEASED; urgency=medium +git-annex (7.20200220) UNRELEASED; urgency=medium + + * Bugfix: export --tracking (a deprecated option) set + annex-annex-tracking-branch, instead of annex-tracking-branch. + * initremote, enableremote: Set remote.name.skipFetchAll when + the remote cannot be fetched from by git, so git fetch --all + will not try to use it. + + -- Joey Hess Wed, 19 Feb 2020 12:48:58 -0400 + +git-annex (7.20200219) upstream; urgency=medium + + * Added sync --only-annex, which syncs the git-annex branch and annexed + content but leaves managing the other git branches up to you. + * Added annex.synconlyannex git config setting, which can also be set with + git-annex config to configure sync in all clones of the repo. + * fsck --from remote: Fix a concurrency bug that could make it incorrectly + detect that content in the remote is corrupt, and remove it, resulting in + data loss. + * When git-annex is built with a ssh that does not support ssh connection + caching, default annex.sshcaching to false, but let the user override it. + * Improve warning messages further when ssh connection caching cannot + be used, to clearly state why. + * Avoid throwing fatal errors when asked to write to a readonly + git remote on http. + * Fix support for repositories tuned with annex.tune.branchhash1=true, + including --all not working and git-annex log not displaying anything + for annexed files. + + -- Joey Hess Wed, 19 Feb 2020 12:44:43 -0400 + +git-annex (7.20200204) upstream; urgency=medium + + * Fix build with persistent-template 2.8.0. + * Makefile: Really move the fish completion to the + vendor_completions.d directory. + + -- Joey Hess Tue, 04 Feb 2020 14:30:55 -0400 + +git-annex (7.20200202.7) upstream; urgency=medium * add: --force-annex/--force-git options make it easier to override annex.largefiles configuration (and potentially safer as it avoids bugs like the smudge bug fixed in the last release). + * reinject --known: Fix bug that prevented it from working in a bare repo. + * Support being used in a git repository that uses sha256 rather than sha1. + * initremote, enableremote: Be stricter about rejecting invalid + configuration parameters for remotes, particularly things like foo=true + when foo=yes is expected. + * initremote, enableremote: Reject unknown configuration parameters + provided to these commands. + * initremote: Added --whatelse option, to show additional + configuration parameters you might want to set. Eg: + git annex initremote type=directory encryption=none --whatelse + * Added LISTCONFIGS to external special remote protocol. Special remote + programs that use GETCONFIG/SETCONFIG are recommended to implement it. + * init: Avoid an ugly error message when http remote has no git-annex + uuid configured. + * Support git remotes that need http basic auth to be accessed, + using git credential to get the password. + * Display a warning when concurrency is enabled but ssh connection caching + is not enabled or won't work due to a crippled filesystem. + * Makefile: Move the fish completion to the vendor_completions.d directory. + * Fixed a test suite failure when run in the C locale. - -- Joey Hess Wed, 01 Jan 2020 12:51:40 -0400 + -- Joey Hess Sun, 02 Feb 2020 00:00:00 -0400 git-annex (7.20191230) upstream; urgency=medium diff --git a/COPYRIGHT b/COPYRIGHT index 858d7f0b74..2ae19d34bb 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -2,7 +2,7 @@ Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Source: native package Files: * -Copyright: © 2010-2019 Joey Hess +Copyright: © 2010-2020 Joey Hess License: AGPL-3+ Files: Assistant/WebApp.hs Assistant/WebApp/* templates/* static/* diff --git a/CmdLine/Usage.hs b/CmdLine/Usage.hs index 27619a04a6..cdbb776231 100644 --- a/CmdLine/Usage.hs +++ b/CmdLine/Usage.hs @@ -96,8 +96,8 @@ paramItem :: String paramItem = "ITEM" paramTreeish :: String paramTreeish = "TREEISH" -paramKeyValue :: String -paramKeyValue = "K=V" +paramParamValue :: String +paramParamValue = "PARAM=VALUE" paramNothing :: String paramNothing = "" paramRepeating :: String -> String diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 4fb03f6b04..f728eb175e 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -261,7 +261,7 @@ downloadWeb addunlockedmatcher o url urlinfo file = go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just (toRawFilePath file))) where urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing - downloader f p = downloadUrl urlkey p [url] f + downloader f p = Url.withUrlOptions $ downloadUrl urlkey p [url] f go Nothing = return Nothing -- If we downloaded a html file, try to use youtube-dl to -- extract embedded media. diff --git a/Command/EnableRemote.hs b/Command/EnableRemote.hs index f43ab68f8b..ab8999c36b 100644 --- a/Command/EnableRemote.hs +++ b/Command/EnableRemote.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2013-2019 Joey Hess + - Copyright 2013-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -24,13 +24,15 @@ import Annex.UUID import Config import Config.DynamicConfig import Types.GitConfig +import Types.ProposedAccepted +import Git.Config import qualified Data.Map as M cmd :: Command cmd = command "enableremote" SectionSetup "enables git-annex to use a remote" - (paramPair paramName $ paramOptional $ paramRepeating paramKeyValue) + (paramPair paramName $ paramOptional $ paramRepeating paramParamValue) (withParams seek) seek :: CmdParams -> CommandSeek @@ -41,7 +43,7 @@ start [] = unknownNameError "Specify the remote to enable." start (name:rest) = go =<< filter matchingname <$> Annex.getGitRemotes where matchingname r = Git.remoteName r == Just name - go [] = startSpecialRemote name (Logs.Remote.keyValToConfig rest) + go [] = startSpecialRemote name (Logs.Remote.keyValToConfig Proposed rest) =<< SpecialRemote.findExisting name go (r:_) = do -- This could be either a normal git remote or a special @@ -85,21 +87,23 @@ startSpecialRemote name config (Just (u, c, mcu)) = performSpecialRemote :: RemoteType -> UUID -> R.RemoteConfig -> R.RemoteConfig -> RemoteGitConfig -> Maybe (SpecialRemote.ConfigFrom UUID) -> CommandPerform performSpecialRemote t u oldc c gc mcu = do (c', u') <- R.setup t (R.Enable oldc) (Just u) Nothing c gc - next $ cleanupSpecialRemote u' c' mcu + next $ cleanupSpecialRemote t u' c' mcu -cleanupSpecialRemote :: UUID -> R.RemoteConfig -> Maybe (SpecialRemote.ConfigFrom UUID) -> CommandCleanup -cleanupSpecialRemote u c mcu = do +cleanupSpecialRemote :: RemoteType -> UUID -> R.RemoteConfig -> Maybe (SpecialRemote.ConfigFrom UUID) -> CommandCleanup +cleanupSpecialRemote t u c mcu = do case mcu of Nothing -> Logs.Remote.configSet u c Just (SpecialRemote.ConfigFrom cu) -> do - setConfig (remoteConfig c "config-uuid") (fromUUID cu) + setConfig (remoteAnnexConfig c "config-uuid") (fromUUID cu) Logs.Remote.configSet cu c Remote.byUUID u >>= \case Nothing -> noop Just r -> do repo <- R.getRepo r setRemoteIgnore repo False + unless (Remote.gitSyncableRemoteType t) $ + setConfig (remoteConfig c "skipFetchAll") (boolConfig True) return True unknownNameError :: String -> Annex a diff --git a/Command/Export.hs b/Command/Export.hs index b0de9f11c0..f66c512d25 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -81,7 +81,7 @@ seek o = do -- handle deprecated option when (exportTracking o) $ - setConfig (remoteConfig r "annex-tracking-branch") + setConfig (remoteAnnexConfig r "tracking-branch") (fromRef $ exportTreeish o) tree <- filterPreferredContent r =<< @@ -216,7 +216,7 @@ mkDiffMap old new db = do , (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek ] getek sha - | sha == nullSha = return Nothing + | sha `elem` nullShas = return Nothing | otherwise = Just <$> exportKey sha newtype FileUploaded = FileUploaded { fromFileUploaded :: Bool } @@ -310,7 +310,7 @@ cleanupExport r db ek loc sent = do startUnexport :: Remote -> ExportHandle -> TopFilePath -> [Git.Sha] -> CommandStart startUnexport r db f shas = do - eks <- forM (filter (/= nullSha) shas) exportKey + eks <- forM (filter (`notElem` nullShas) shas) exportKey if null eks then stop else starting ("unexport " ++ name r) (ActionItemOther (Just (fromRawFilePath f'))) $ @@ -359,7 +359,7 @@ cleanupUnexport r db eks loc = do startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart startRecoverIncomplete r db sha oldf - | sha == nullSha = stop + | sha `elem` nullShas = stop | otherwise = do ek <- exportKey sha let loc = exportTempName ek diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 65c0112ea7..cee57c763b 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2019 Joey Hess + - Copyright 2010-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -161,6 +161,11 @@ performRemote key afile backend numcopies remote = ] ai = mkActionItem (key, afile) withtmp a = do + -- Put it in the gitAnnexTmpObjectDir since that's on a + -- filesystem where object temp files are normally + -- stored. The pid prevents multiple fsck processes + -- contending over the same file. (Multiple threads cannot, + -- because OnlyActionOn is used.) pid <- liftIO getPID t <- fromRepo gitAnnexTmpObjectDir createAnnexDirectory t @@ -541,7 +546,7 @@ badContentRemote remote localcopy key = do runFsck :: Incremental -> ActionItem -> Key -> Annex Bool -> CommandStart runFsck inc ai key a = stopUnless (needFsck inc key) $ - starting "fsck" ai $ do + starting "fsck" (OnlyActionOn key ai) $ do ok <- a when ok $ recordFsckTime inc key diff --git a/Command/InitRemote.hs b/Command/InitRemote.hs index 09aee869dc..c7b65c040c 100644 --- a/Command/InitRemote.hs +++ b/Command/InitRemote.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -16,27 +16,36 @@ import Annex.SpecialRemote import qualified Remote import qualified Logs.Remote import qualified Types.Remote as R +import Types.RemoteConfig import Annex.UUID import Logs.UUID import Logs.Remote import Types.GitConfig +import Types.ProposedAccepted import Config +import Git.Config cmd :: Command cmd = command "initremote" SectionSetup "creates a special (non-git) remote" - (paramPair paramName $ paramOptional $ paramRepeating paramKeyValue) + (paramPair paramName $ paramOptional $ paramRepeating paramParamValue) (seek <$$> optParser) data InitRemoteOptions = InitRemoteOptions { cmdparams :: CmdParams , sameas :: Maybe (DeferredParse UUID) + , whatElse :: Bool } optParser :: CmdParamsDesc -> Parser InitRemoteOptions optParser desc = InitRemoteOptions <$> cmdParams desc <*> optional parseSameasOption + <*> switch + ( long "whatelse" + <> short 'w' + <> help "describe other configuration parameters for a special remote" + ) parseSameasOption :: Parser (DeferredParse UUID) parseSameasOption = parseUUIDOption <$> strOption @@ -63,35 +72,67 @@ start o (name:ws) = ifM (isJust <$> findExisting name) (Just . Sameas <$$> getParsed) (sameas o) c <- newConfig name sameasuuid - (Logs.Remote.keyValToConfig ws) + (Logs.Remote.keyValToConfig Proposed ws) <$> readRemoteLog t <- either giveup return (findType c) - starting "initremote" (ActionItemOther (Just name)) $ - perform t name c o + if whatElse o + then startingCustomOutput (ActionItemOther Nothing) $ + describeOtherParamsFor c t + else starting "initremote" (ActionItemOther (Just name)) $ + perform t name c o ) ) perform :: RemoteType -> String -> R.RemoteConfig -> InitRemoteOptions -> CommandPerform perform t name c o = do dummycfg <- liftIO dummyRemoteGitConfig - (c', u) <- R.setup t R.Init (sameasu <|> uuidfromuser) Nothing c dummycfg - next $ cleanup u name c' o + let c' = M.delete uuidField c + (c'', u) <- R.setup t R.Init (sameasu <|> uuidfromuser) Nothing c' dummycfg + next $ cleanup t u name c'' o where - uuidfromuser = case M.lookup "uuid" c of + uuidfromuser = case fromProposedAccepted <$> M.lookup uuidField c of Just s | isUUID s -> Just (toUUID s) | otherwise -> giveup "invalid uuid" Nothing -> Nothing - sameasu = toUUID <$> M.lookup sameasUUIDField c + sameasu = toUUID . fromProposedAccepted <$> M.lookup sameasUUIDField c -cleanup :: UUID -> String -> R.RemoteConfig -> InitRemoteOptions -> CommandCleanup -cleanup u name c o = do +uuidField :: R.RemoteConfigField +uuidField = Accepted "uuid" + +cleanup :: RemoteType -> UUID -> String -> R.RemoteConfig -> InitRemoteOptions -> CommandCleanup +cleanup t u name c o = do case sameas o of Nothing -> do describeUUID u (toUUIDDesc name) Logs.Remote.configSet u c Just _ -> do cu <- liftIO genUUID - setConfig (remoteConfig c "config-uuid") (fromUUID cu) + setConfig (remoteAnnexConfig c "config-uuid") (fromUUID cu) Logs.Remote.configSet cu c + unless (Remote.gitSyncableRemoteType t) $ + setConfig (remoteConfig c "skipFetchAll") (boolConfig True) return True + +describeOtherParamsFor :: RemoteConfig -> RemoteType -> CommandPerform +describeOtherParamsFor c t = do + cp <- R.configParser t c + let l = map mk (filter notinconfig $ remoteConfigFieldParsers cp) + ++ map mk' (maybe [] snd (remoteConfigRestPassthrough cp)) + liftIO $ forM_ l $ \(p, fd, vd) -> case fd of + HiddenField -> return () + FieldDesc d -> do + putStrLn p + putStrLn ("\t" ++ d) + case vd of + Nothing -> return () + Just (ValueDesc d') -> + putStrLn $ "\t(" ++ d' ++ ")" + next $ return True + where + notinconfig fp = not (M.member (parserForField fp) c) + mk fp = ( fromProposedAccepted (parserForField fp) + , fieldDesc fp + , valueDesc fp + ) + mk' (k, v) = (k, v, Nothing) diff --git a/Command/Log.hs b/Command/Log.hs index 861229183f..5597bfbf47 100644 --- a/Command/Log.hs +++ b/Command/Log.hs @@ -210,17 +210,18 @@ getAllLog = getGitLog [] getGitLog :: [FilePath] -> [CommandParam] -> Annex ([RefChange], IO Bool) getGitLog fs os = do + config <- Annex.getGitConfig (ls, cleanup) <- inRepo $ pipeNullSplit $ [ Param "log" , Param "-z" , Param "--pretty=format:%ct" , Param "--raw" - , Param "--abbrev=40" + , Param "--no-abbrev" ] ++ os ++ [ Param $ Git.fromRef Annex.Branch.fullname , Param "--" ] ++ map Param fs - return (parseGitRawLog (map decodeBL' ls), cleanup) + return (parseGitRawLog config (map decodeBL' ls), cleanup) -- Parses chunked git log --raw output, which looks something like: -- @@ -236,8 +237,8 @@ getGitLog fs os = do -- -- The timestamp is not included before all changelines, so -- keep track of the most recently seen timestamp. -parseGitRawLog :: [String] -> [RefChange] -parseGitRawLog = parse epoch +parseGitRawLog :: GitConfig -> [String] -> [RefChange] +parseGitRawLog config = parse epoch where epoch = toEnum 0 :: POSIXTime parse oldts ([]:rest) = parse oldts rest @@ -250,7 +251,7 @@ parseGitRawLog = parse epoch (tss, cl') -> (parseTimeStamp tss, cl') mrc = do (old, new) <- parseRawChangeLine cl - key <- locationLogFileKey (toRawFilePath c2) + key <- locationLogFileKey config (toRawFilePath c2) return $ RefChange { changetime = ts , oldref = old diff --git a/Command/Merge.hs b/Command/Merge.hs index 79d028ec83..fe1119dc8a 100644 --- a/Command/Merge.hs +++ b/Command/Merge.hs @@ -12,7 +12,7 @@ import qualified Annex.Branch import qualified Git import qualified Git.Branch import Annex.CurrentBranch -import Command.Sync (prepMerge, mergeLocal, mergeConfig, merge) +import Command.Sync (prepMerge, mergeLocal, mergeConfig, merge, SyncOptions(..)) cmd :: Command cmd = command "merge" SectionMaintenance @@ -41,4 +41,5 @@ mergeSyncedBranch = mergeLocal mergeConfig def =<< getCurrentBranch mergeBranch :: Git.Ref -> CommandStart mergeBranch r = starting "merge" (ActionItemOther (Just (Git.fromRef r))) $ do currbranch <- getCurrentBranch - next $ merge currbranch mergeConfig def Git.Branch.ManualCommit r + let o = def { notOnlyAnnexOption = True } + next $ merge currbranch mergeConfig o Git.Branch.ManualCommit r diff --git a/Command/P2P.hs b/Command/P2P.hs index e1896c7a3f..2e3fad53f7 100644 --- a/Command/P2P.hs +++ b/Command/P2P.hs @@ -320,7 +320,7 @@ setupLink remotename (P2PAddressAuth addr authtoken) = do , Param (formatP2PAddress addr) ] when ok $ do - storeUUIDIn (remoteConfig remotename "uuid") theiruuid + storeUUIDIn (remoteAnnexConfig remotename "uuid") theiruuid storeP2PRemoteAuthToken addr authtoken return LinkSuccess go (Right Nothing) = return $ AuthenticationError "Unable to authenticate with peer. Please check the address and try again." diff --git a/Command/PostReceive.hs b/Command/PostReceive.hs index 096cc87e47..0202fef7bb 100644 --- a/Command/PostReceive.hs +++ b/Command/PostReceive.hs @@ -14,7 +14,7 @@ import qualified Annex import Git.Types import Annex.UpdateInstead import Annex.CurrentBranch -import Command.Sync (mergeLocal, prepMerge, mergeConfig) +import Command.Sync (mergeLocal, prepMerge, mergeConfig, SyncOptions(..)) -- This does not need to modify the git-annex branch to update the -- work tree, but auto-initialization might change the git-annex branch. @@ -51,4 +51,5 @@ fixPostReceiveHookEnv = do updateInsteadEmulation :: CommandStart updateInsteadEmulation = do prepMerge - mergeLocal mergeConfig def =<< getCurrentBranch + let o = def { notOnlyAnnexOption = True } + mergeLocal mergeConfig o =<< getCurrentBranch diff --git a/Command/Reinject.hs b/Command/Reinject.hs index d33817debf..a73253ba12 100644 --- a/Command/Reinject.hs +++ b/Command/Reinject.hs @@ -13,6 +13,7 @@ import Annex.Content import Backend import Types.KeySource import Utility.Metered +import qualified Git cmd :: Command cmd = command "reinject" SectionUtility @@ -65,8 +66,13 @@ startKnown src = notAnnexed src $ ) notAnnexed :: FilePath -> CommandStart -> CommandStart -notAnnexed src = ifAnnexed (toRawFilePath src) $ - giveup $ "cannot used annexed file as src: " ++ src +notAnnexed src a = + ifM (fromRepo Git.repoIsLocalBare) + ( a + , ifAnnexed (toRawFilePath src) + (giveup $ "cannot used annexed file as src: " ++ src) + a + ) perform :: FilePath -> Key -> CommandPerform perform src key = ifM move diff --git a/Command/RenameRemote.hs b/Command/RenameRemote.hs index 51e0127b0d..6860bacff4 100644 --- a/Command/RenameRemote.hs +++ b/Command/RenameRemote.hs @@ -13,6 +13,7 @@ import Annex.SpecialRemote.Config (nameField, sameasNameField) import qualified Logs.Remote import qualified Types.Remote as R import qualified Remote +import Types.ProposedAccepted import qualified Data.Map as M @@ -50,6 +51,6 @@ perform u cfg mcu newname = do let (namefield, cu) = case mcu of Nothing -> (nameField, u) Just (Annex.SpecialRemote.ConfigFrom u') -> (sameasNameField, u') - Logs.Remote.configSet cu (M.insert namefield newname cfg) + Logs.Remote.configSet cu (M.insert namefield (Proposed newname) cfg) next $ return True diff --git a/Command/Sync.hs b/Command/Sync.hs index ff35f2219a..fe6f72c7dc 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -1,7 +1,7 @@ {- git-annex command - - Copyright 2011 Joachim Breitner - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -24,6 +24,7 @@ module Command.Sync ( syncBranch, updateBranches, seekExportContent, + SyncOptions(..), ) where import Command @@ -43,6 +44,7 @@ import Git.FilePath import qualified Remote.Git import Config import Config.GitConfig +import Annex.SpecialRemote.Config import Config.DynamicConfig import Config.Files import Annex.Wanted @@ -77,8 +79,10 @@ cmd = withGlobalOptions [jobsOption] $ "synchronize local repository with remotes" (paramRepeating paramRemote) (seek <--< optParser) -data SyncOptions = SyncOptions +data SyncOptions = SyncOptions { syncWith :: CmdParams + , onlyAnnexOption :: Bool + , notOnlyAnnexOption :: Bool , commitOption :: Bool , noCommitOption :: Bool , messageOption :: Maybe String @@ -89,13 +93,26 @@ data SyncOptions = SyncOptions , contentOfOption :: [FilePath] , cleanupOption :: Bool , keyOptions :: Maybe KeyOptions - , resolveMergeOverride :: ResolveMergeOverride + , resolveMergeOverride :: Bool } -newtype ResolveMergeOverride = ResolveMergeOverride Bool - -instance Default ResolveMergeOverride where - def = ResolveMergeOverride False +instance Default SyncOptions where + def = SyncOptions + { syncWith = [] + , onlyAnnexOption = False + , notOnlyAnnexOption = False + , commitOption = False + , noCommitOption = False + , messageOption = Nothing + , pullOption = False + , pushOption = False + , contentOption = False + , noContentOption = False + , contentOfOption = [] + , cleanupOption = False + , keyOptions = Nothing + , resolveMergeOverride = False + } optParser :: CmdParamsDesc -> Parser SyncOptions optParser desc = SyncOptions @@ -103,6 +120,15 @@ optParser desc = SyncOptions ( metavar desc <> completeRemotes )) + <*> switch + ( long "only-annex" + <> short 'a' + <> help "only sync git-annex branch and annexed file contents" + ) + <*> switch + ( long "not-only-annex" + <> help "sync git branches as well as annex" + ) <*> switch ( long "commit" <> help "commit changes to git" @@ -123,16 +149,16 @@ optParser desc = SyncOptions ) <*> switch ( long "content" - <> help "transfer file contents" + <> help "transfer annexed file contents" ) <*> switch ( long "no-content" - <> help "do not transfer file contents" + <> help "do not transfer annexed file contents" ) <*> many (strOption ( long "content-of" <> short 'C' - <> help "transfer file contents of files in a given location" + <> help "transfer contents of annexed files in a given location" <> metavar paramPath )) <*> switch @@ -140,15 +166,17 @@ optParser desc = SyncOptions <> help "remove synced/ branches from previous sync" ) <*> optional parseAllOption - <*> (ResolveMergeOverride <$> invertableSwitch "resolvemerge" True + <*> invertableSwitch "resolvemerge" True ( help "do not automatically resolve merge conflicts" - )) + ) -- Since prepMerge changes the working directory, FilePath options -- have to be adjusted. instance DeferredParseClass SyncOptions where finishParse v = SyncOptions <$> pure (syncWith v) + <*> pure (onlyAnnexOption v) + <*> pure (notOnlyAnnexOption v) <*> pure (commitOption v) <*> pure (noCommitOption v) <*> pure (messageOption v) @@ -171,7 +199,7 @@ seek' o = do let withbranch a = a =<< getCurrentBranch remotes <- syncRemotes (syncWith o) - let gitremotes = filter Remote.gitSyncableRemote remotes + let gitremotes = filter (Remote.gitSyncableRemoteType . Remote.remotetype) remotes dataremotes <- filter (\r -> Remote.uuid r /= NoUUID) <$> filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) remotes let (exportremotes, keyvalueremotes) = partition (exportTree . Remote.config) dataremotes @@ -188,12 +216,12 @@ seek' o = do -- These actions cannot be run concurrently. mapM_ includeCommandAction $ concat [ [ commit o ] - , [ withbranch (mergeLocal mergeConfig (resolveMergeOverride o)) ] + , [ withbranch (mergeLocal mergeConfig o) ] , map (withbranch . pullRemote o mergeConfig) gitremotes , [ mergeAnnex ] ] - whenM shouldsynccontent $ do + whenM (shouldSyncContent o) $ do mapM_ (withbranch . importRemote o mergeConfig) importremotes -- Send content to any exports before other @@ -214,13 +242,9 @@ seek' o = do , [ commitAnnex, mergeAnnex ] ] - void $ includeCommandAction $ withbranch pushLocal + void $ includeCommandAction $ withbranch $ pushLocal o -- Pushes to remotes can run concurrently. mapM_ (commandAction . withbranch . pushRemote o) gitremotes - where - shouldsynccontent = pure (contentOption o) - <||> pure (not (null (contentOfOption o))) - <||> (pure (not (noContentOption o)) <&&> getGitConfigVal annexSyncContent) {- Merging may delete the current directory, so go to the top - of the repo. This also means that sync always acts on all files in the @@ -240,14 +264,14 @@ mergeConfig = , Git.Merge.MergeUnrelatedHistories ] -merge :: CurrBranch -> [Git.Merge.MergeConfig] -> ResolveMergeOverride -> Git.Branch.CommitMode -> Git.Branch -> Annex Bool -merge currbranch mergeconfig resolvemergeoverride commitmode tomerge = case currbranch of +merge :: CurrBranch -> [Git.Merge.MergeConfig] -> SyncOptions -> Git.Branch.CommitMode -> Git.Branch -> Annex Bool +merge currbranch mergeconfig o commitmode tomerge = case currbranch of (Just b, Just adj) -> mergeToAdjustedBranch tomerge (b, adj) mergeconfig canresolvemerge commitmode (b, _) -> autoMergeFrom tomerge b mergeconfig canresolvemerge commitmode where - canresolvemerge = case resolvemergeoverride of - ResolveMergeOverride True -> getGitConfigVal annexResolveMerge - ResolveMergeOverride False -> return False + canresolvemerge = if resolveMergeOverride o + then getGitConfigVal annexResolveMerge + else return False syncBranch :: Git.Branch -> Git.Branch syncBranch = Git.Ref.underBase "refs/heads/synced" . fromAdjustedBranch @@ -276,7 +300,7 @@ syncRemotes' ps available = listed = concat <$> mapM Remote.byNameOrGroup ps good r - | Remote.gitSyncableRemote r = + | Remote.gitSyncableRemoteType (Remote.remotetype r) = Remote.Git.repoAvail =<< Remote.getRepo r | otherwise = return True @@ -295,8 +319,10 @@ commit o = stopUnless shouldcommit $ starting "commit" (ActionItemOther Nothing) ] return True where - shouldcommit = pure (commitOption o) + shouldcommit = notOnlyAnnex o <&&> + ( pure (commitOption o) <||> (pure (not (noCommitOption o)) <&&> getGitConfigVal annexAutoCommit) + ) commitMsg :: Annex String commitMsg = do @@ -315,14 +341,18 @@ commitStaged commitmode commitmessage = do void $ inRepo $ Git.Branch.commit commitmode False commitmessage branch parents return True -mergeLocal :: [Git.Merge.MergeConfig] -> ResolveMergeOverride -> CurrBranch -> CommandStart -mergeLocal mergeconfig resolvemergeoverride currbranch@(Just _, _) = +mergeLocal :: [Git.Merge.MergeConfig] -> SyncOptions -> CurrBranch -> CommandStart +mergeLocal mergeconfig o currbranch = stopUnless (notOnlyAnnex o) $ + mergeLocal' mergeconfig o currbranch + +mergeLocal' :: [Git.Merge.MergeConfig] -> SyncOptions -> CurrBranch -> CommandStart +mergeLocal' mergeconfig o currbranch@(Just _, _) = needMerge currbranch >>= \case Nothing -> stop Just syncbranch -> starting "merge" (ActionItemOther (Just $ Git.Ref.describe syncbranch)) $ - next $ merge currbranch mergeconfig resolvemergeoverride Git.Branch.ManualCommit syncbranch -mergeLocal _ _ (Nothing, madj) = do + next $ merge currbranch mergeconfig o Git.Branch.ManualCommit syncbranch +mergeLocal' _ _ (Nothing, madj) = do b <- inRepo Git.Branch.currentUnsafe needMerge (b, madj) >>= \case Nothing -> stop @@ -347,8 +377,8 @@ needMerge (Just branch, madj) = ifM (allM id checks) syncbranch = syncBranch branch branch' = maybe branch (adjBranch . originalToAdjusted branch) madj -pushLocal :: CurrBranch -> CommandStart -pushLocal b = do +pushLocal :: SyncOptions -> CurrBranch -> CommandStart +pushLocal o b = stopUnless (notOnlyAnnex o) $ do updateBranches b stop @@ -387,16 +417,25 @@ pullRemote :: SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o && wantpull) $ starting "pull" (ActionItemOther (Just (Remote.name remote))) $ do showOutput - ifM fetch - ( next $ mergeRemote remote branch mergeconfig (resolveMergeOverride o) - , next $ return True + ifM (onlyAnnex o) + ( do + void $ fetch $ map Git.fromRef + [ Annex.Branch.name + , syncBranch $ Annex.Branch.name + ] + next $ return True + , ifM (fetch []) + ( next $ mergeRemote remote branch mergeconfig o + , next $ return True + ) ) where - fetch = do + fetch bs = do repo <- Remote.getRepo remote inRepoWithSshOptionsTo repo (Remote.gitconfig remote) $ - Git.Command.runBool + Git.Command.runBool $ [Param "fetch", Param $ Remote.name remote] + ++ map Param bs wantpull = remoteAnnexPull (Remote.gitconfig remote) importRemote :: SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandSeek @@ -411,8 +450,7 @@ importRemote o mergeconfig remote currbranch then Nothing else Just (asTopFilePath (toRawFilePath s)) Command.Import.seekRemote remote branch subdir - void $ mergeRemote remote currbranch mergeconfig - (resolveMergeOverride o) + void $ mergeRemote remote currbranch mergeconfig o where wantpull = remoteAnnexPull (Remote.gitconfig remote) @@ -421,8 +459,8 @@ importRemote o mergeconfig remote currbranch - were committed (or pushed changes, if this is a bare remote), - while the synced/master may have changes that some - other remote synced to this remote. So, merge them both. -} -mergeRemote :: Remote -> CurrBranch -> [Git.Merge.MergeConfig] -> ResolveMergeOverride -> CommandCleanup -mergeRemote remote currbranch mergeconfig resolvemergeoverride = ifM isBareRepo +mergeRemote :: Remote -> CurrBranch -> [Git.Merge.MergeConfig] -> SyncOptions -> CommandCleanup +mergeRemote remote currbranch mergeconfig o = ifM isBareRepo ( return True , case currbranch of (Nothing, _) -> do @@ -434,31 +472,36 @@ mergeRemote remote currbranch mergeconfig resolvemergeoverride = ifM isBareRepo ) where mergelisted getlist = and <$> - (mapM (merge currbranch mergeconfig resolvemergeoverride Git.Branch.ManualCommit . remoteBranch remote) =<< getlist) + (mapM (merge currbranch mergeconfig o Git.Branch.ManualCommit . remoteBranch remote) =<< getlist) tomerge = filterM (changed remote) branchlist Nothing = [] branchlist (Just branch) = [fromAdjustedBranch branch, syncBranch branch] pushRemote :: SyncOptions -> Remote -> CurrBranch -> CommandStart pushRemote _o _remote (Nothing, _) = stop -pushRemote o remote (Just branch, _) = stopUnless (pure (pushOption o) <&&> needpush) $ - starting "push" (ActionItemOther (Just (Remote.name remote))) $ next $ do - repo <- Remote.getRepo remote - showOutput - ok <- inRepoWithSshOptionsTo repo gc $ - pushBranch remote branch - if ok - then postpushupdate repo - else do - warning $ unwords [ "Pushing to " ++ Remote.name remote ++ " failed." ] - showLongNote "(non-fast-forward problems can be solved by setting receive.denyNonFastforwards to false in the remote's git config)" - return ok +pushRemote o remote (Just branch, _) = do + onlyannex <- onlyAnnex o + let mainbranch = if onlyannex then Nothing else Just branch + stopUnless (pure (pushOption o) <&&> needpush mainbranch) $ + starting "push" (ActionItemOther (Just (Remote.name remote))) $ next $ do + repo <- Remote.getRepo remote + showOutput + ok <- inRepoWithSshOptionsTo repo gc $ + pushBranch remote mainbranch + if ok + then postpushupdate repo + else do + warning $ unwords [ "Pushing to " ++ Remote.name remote ++ " failed." ] + return ok where gc = Remote.gitconfig remote - needpush + needpush mainbranch | remoteAnnexReadOnly gc = return False | not (remoteAnnexPush gc) = return False - | otherwise = anyM (newer remote) [syncBranch branch, Annex.Branch.name] + | otherwise = anyM (newer remote) $ catMaybes + [ syncBranch <$> mainbranch + , Just (Annex.Branch.name) + ] -- Older remotes on crippled filesystems may not have a -- post-receive hook set up, so when updateInstead emulation -- is needed, run post-receive manually. @@ -504,20 +547,18 @@ pushRemote o remote (Just branch, _) = stopUnless (pure (pushOption o) <&&> need - But overwriting of data on synced/git-annex can happen, in a race. - The only difference caused by using a forced push in that case is that - the last repository to push wins the race, rather than the first to push. - - - - The sync push will fail to overwrite if receive.denyNonFastforwards is - - set on the remote. -} -pushBranch :: Remote -> Git.Branch -> Git.Repo -> IO Bool -pushBranch remote branch g = directpush `after` annexpush `after` syncpush +pushBranch :: Remote -> Maybe Git.Branch -> Git.Repo -> IO Bool +pushBranch remote mbranch g = directpush `after` annexpush `after` syncpush where - syncpush = flip Git.Command.runBool g $ pushparams - [ Git.Branch.forcePush $ refspec Annex.Branch.name - , refspec $ fromAdjustedBranch branch + syncpush = flip Git.Command.runBool g $ pushparams $ catMaybes + [ Just $ Git.Branch.forcePush $ refspec Annex.Branch.name + , (refspec . fromAdjustedBranch) <$> mbranch ] annexpush = void $ tryIO $ flip Git.Command.runQuiet g $ pushparams [ Git.fromRef $ Git.Ref.base $ Annex.Branch.name ] - directpush = do + directpush = case mbranch of + Nothing -> noop -- Git prints out an error message when this fails. -- In the default configuration of receive.denyCurrentBranch, -- the error message mentions that config setting @@ -528,11 +569,12 @@ pushBranch remote branch g = directpush `after` annexpush `after` syncpush -- including the error displayed when -- receive.denyCurrentBranch=updateInstead -- the user -- will want to see that one. - let p = flip Git.Command.gitCreateProcess g $ pushparams - [ Git.fromRef $ Git.Ref.base $ fromAdjustedBranch branch ] - (transcript, ok) <- processTranscript' p Nothing - when (not ok && not ("denyCurrentBranch" `isInfixOf` transcript)) $ - hPutStr stderr transcript + Just branch -> do + let p = flip Git.Command.gitCreateProcess g $ pushparams + [ Git.fromRef $ Git.Ref.base $ fromAdjustedBranch branch ] + (transcript, ok) <- processTranscript' p Nothing + when (not ok && not ("denyCurrentBranch" `isInfixOf` transcript)) $ + hPutStr stderr transcript pushparams branches = [ Param "push" , Param $ Remote.name remote @@ -746,7 +788,7 @@ seekExportContent o rs (currbranch, _) = or <$> forM rs go ] _ -> noop where - gitconfig = show (remoteConfig r "tracking-branch") + gitconfig = show (remoteAnnexConfig r "tracking-branch") fillexport _ _ [] _ = return False fillexport r db (tree:[]) mtbcommitsha = do @@ -783,3 +825,18 @@ cleanupRemote remote (Just b, _) = , Param $ Git.fromRef $ syncBranch $ Git.Ref.base $ Annex.Branch.name ] + +shouldSyncContent :: SyncOptions -> Annex Bool +shouldSyncContent o + | noContentOption o = pure False + | contentOption o || not (null (contentOfOption o)) = pure True + | otherwise = getGitConfigVal annexSyncContent <||> onlyAnnex o + +notOnlyAnnex :: SyncOptions -> Annex Bool +notOnlyAnnex o = not <$> onlyAnnex o + +onlyAnnex :: SyncOptions -> Annex Bool +onlyAnnex o + | notOnlyAnnexOption o = pure False + | onlyAnnexOption o = pure True + | otherwise = getGitConfigVal annexSyncOnlyAnnex diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index bf8c24cd5d..7e56e25181 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2014-2019 Joey Hess + - Copyright 2014-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -24,8 +24,12 @@ import Utility.DataUnits import Utility.CopyFile import Types.Messages import Types.Export +import Types.Crypto +import Types.RemoteConfig +import Annex.SpecialRemote.Config (exportTreeField) import Remote.Helper.ExportImport import Remote.Helper.Chunked +import Remote.Helper.Encryptable (describeEncryption, encryptionField, highRandomQualityField) import Git.Types import Test.Tasty @@ -109,7 +113,7 @@ perform rs unavailrs exportr ks = do desc r' k = intercalate "; " $ map unwords [ [ "key size", show (fromKey keySize k) ] , [ show (getChunkConfig (Remote.config r')) ] - , ["encryption", fromMaybe "none" (M.lookup "encryption" (Remote.config r'))] + , ["encryption", describeEncryption (Remote.config r')] ] descexport k1 k2 = intercalate "; " $ map unwords [ [ "exporttree=yes" ] @@ -119,33 +123,35 @@ perform rs unavailrs exportr ks = do adjustChunkSize :: Remote -> Int -> Annex (Maybe Remote) adjustChunkSize r chunksize = adjustRemoteConfig r - (M.insert "chunk" (show chunksize)) + (M.insert chunkField (RemoteConfigValue (show chunksize))) -- Variants of a remote with no encryption, and with simple shared -- encryption. Gpg key based encryption is not tested. encryptionVariants :: Remote -> Annex [Remote] encryptionVariants r = do - noenc <- adjustRemoteConfig r (M.insert "encryption" "none") + noenc <- adjustRemoteConfig r $ + M.insert encryptionField (RemoteConfigValue NoneEncryption) sharedenc <- adjustRemoteConfig r $ - M.insert "encryption" "shared" . - M.insert "highRandomQuality" "false" + M.insert encryptionField (RemoteConfigValue SharedEncryption) . + M.insert highRandomQualityField (RemoteConfigValue False) return $ catMaybes [noenc, sharedenc] -- Variant of a remote with exporttree disabled. disableExportTree :: Remote -> Annex Remote disableExportTree r = maybe (error "failed disabling exportree") return - =<< adjustRemoteConfig r (M.delete "exporttree") + =<< adjustRemoteConfig r (M.delete exportTreeField) -- Variant of a remote with exporttree enabled. exportTreeVariant :: Remote -> Annex (Maybe Remote) exportTreeVariant r = ifM (Remote.isExportSupported r) ( adjustRemoteConfig r $ - M.insert "encryption" "none" . M.insert "exporttree" "yes" + M.insert encryptionField (RemoteConfigValue NoneEncryption) . + M.insert exportTreeField (RemoteConfigValue True) , return Nothing ) -- Regenerate a remote with a modified config. -adjustRemoteConfig :: Remote -> (Remote.RemoteConfig -> Remote.RemoteConfig) -> Annex (Maybe Remote) +adjustRemoteConfig :: Remote -> (Remote.ParsedRemoteConfig -> Remote.ParsedRemoteConfig) -> Annex (Maybe Remote) adjustRemoteConfig r adjustconfig = do repo <- Remote.getRepo r Remote.generate (Remote.remotetype r) diff --git a/Command/Undo.hs b/Command/Undo.hs index 0899715a09..d27a4de821 100644 --- a/Command/Undo.hs +++ b/Command/Undo.hs @@ -58,7 +58,7 @@ perform p = do -- Take two passes through the diff, first doing any removals, -- and then any adds. This order is necessary to handle eg, removing -- a directory and replacing it with a file. - let (removals, adds) = partition (\di -> dstsha di == nullSha) diff' + let (removals, adds) = partition (\di -> dstsha di `elem` nullShas) diff' let mkrel di = liftIO $ relPathCwdToFile $ fromRawFilePath $ fromTopFilePath (file di) g diff --git a/Command/Unused.hs b/Command/Unused.hs index 78400db7e1..b68452d5c8 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -267,7 +267,7 @@ withKeysReferencedDiff a getdiff extractsha = do where go d = do let sha = extractsha d - unless (sha == nullSha) $ + unless (sha `elem` nullShas) $ catKey sha >>= maybe noop a {- Filters out keys that have an associated file that's not modified. -} diff --git a/Config.hs b/Config.hs index 68c657aa47..0f5f14d913 100644 --- a/Config.hs +++ b/Config.hs @@ -1,6 +1,6 @@ {- Git configuration - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -20,9 +20,7 @@ import Config.DynamicConfig import Types.Availability import Git.Types import qualified Types.Remote as Remote -import qualified Annex.SpecialRemote.Config as SpecialRemote -import qualified Data.Map as M import qualified Data.ByteString as S type UnqualifiedConfigKey = S.ByteString @@ -64,13 +62,14 @@ instance RemoteNameable RemoteName where instance RemoteNameable Remote where getRemoteName = Remote.name -instance RemoteNameable Remote.RemoteConfig where - getRemoteName c = fromMaybe "" (SpecialRemote.lookupName c) - {- A per-remote config setting in git config. -} remoteConfig :: RemoteNameable r => r -> UnqualifiedConfigKey -> ConfigKey remoteConfig r key = ConfigKey $ - "remote." <> encodeBS' (getRemoteName r) <> ".annex-" <> key + "remote." <> encodeBS' (getRemoteName r) <> "." <> key + +{- A per-remote config annex setting in git config. -} +remoteAnnexConfig :: RemoteNameable r => r -> UnqualifiedConfigKey -> ConfigKey +remoteAnnexConfig r key = remoteConfig r ("annex-" <> key) {- A global annex setting in git config. -} annexConfig :: UnqualifiedConfigKey -> ConfigKey @@ -86,22 +85,16 @@ remoteCost' :: RemoteGitConfig -> Annex (Maybe Cost) remoteCost' = liftIO . getDynamicConfig . remoteAnnexCost setRemoteCost :: Git.Repo -> Cost -> Annex () -setRemoteCost r c = setConfig (remoteConfig r "cost") (show c) +setRemoteCost r c = setConfig (remoteAnnexConfig r "cost") (show c) setRemoteAvailability :: Git.Repo -> Availability -> Annex () -setRemoteAvailability r c = setConfig (remoteConfig r "availability") (show c) +setRemoteAvailability r c = setConfig (remoteAnnexConfig r "availability") (show c) setRemoteIgnore :: Git.Repo -> Bool -> Annex () -setRemoteIgnore r b = setConfig (remoteConfig r "ignore") (Git.Config.boolConfig b) +setRemoteIgnore r b = setConfig (remoteAnnexConfig r "ignore") (Git.Config.boolConfig b) setRemoteBare :: Git.Repo -> Bool -> Annex () -setRemoteBare r b = setConfig (remoteConfig r "bare") (Git.Config.boolConfig b) - -exportTree :: Remote.RemoteConfig -> Bool -exportTree c = fromMaybe False $ yesNo =<< M.lookup "exporttree" c - -importTree :: Remote.RemoteConfig -> Bool -importTree c = fromMaybe False $ yesNo =<< M.lookup "importtree" c +setRemoteBare r b = setConfig (remoteAnnexConfig r "bare") (Git.Config.boolConfig b) isBareRepo :: Annex Bool isBareRepo = fromRepo Git.repoIsLocalBare diff --git a/Creds.hs b/Creds.hs index 3531060d09..ef05982ca6 100644 --- a/Creds.hs +++ b/Creds.hs @@ -1,6 +1,6 @@ {- Credentials storage - - - Copyright 2012-2014 Joey Hess + - Copyright 2012-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -9,6 +9,7 @@ module Creds ( module Types.Creds, CredPairStorage(..), setRemoteCredPair, + setRemoteCredPair', getRemoteCredPair, getRemoteCredPairFor, missingCredPairFor, @@ -23,11 +24,14 @@ module Creds ( import Annex.Common import qualified Annex import Types.Creds +import Types.RemoteConfig +import Annex.SpecialRemote.Config import Annex.Perms import Utility.FileMode import Crypto import Types.Remote (RemoteConfig, RemoteConfigField) -import Remote.Helper.Encryptable (remoteCipher, remoteCipher', embedCreds, EncryptionIsSetup, extractCipher) +import Types.ProposedAccepted +import Remote.Helper.Encryptable (remoteCipher, remoteCipher', embedCreds, EncryptionIsSetup, extractCipher, parseEncryptionConfig) import Utility.Env (getEnv) import qualified Data.ByteString.Lazy.Char8 as L @@ -53,32 +57,47 @@ data CredPairStorage = CredPairStorage - cipher. The EncryptionIsSetup is witness to that being the case. -} setRemoteCredPair :: EncryptionIsSetup -> RemoteConfig -> RemoteGitConfig -> CredPairStorage -> Maybe CredPair -> Annex RemoteConfig -setRemoteCredPair encsetup c gc storage mcreds = case mcreds of - Nothing -> maybe (return c) (setRemoteCredPair encsetup c gc storage . Just) - =<< getRemoteCredPair c gc storage +setRemoteCredPair = setRemoteCredPair' id + (either (const mempty) id . parseEncryptionConfig) + +setRemoteCredPair' + :: (ProposedAccepted String -> a) + -> (M.Map RemoteConfigField a -> ParsedRemoteConfig) + -> EncryptionIsSetup + -> M.Map RemoteConfigField a + -> RemoteGitConfig + -> CredPairStorage + -> Maybe CredPair + -> Annex (M.Map RemoteConfigField a) +setRemoteCredPair' mkval parseconfig encsetup c gc storage mcreds = case mcreds of + Nothing -> maybe (return c) (setRemoteCredPair' mkval parseconfig encsetup c gc storage . Just) + =<< getRemoteCredPair pc gc storage Just creds - | embedCreds c -> + | embedCreds pc -> do let key = credPairRemoteField storage - in storeconfig creds key =<< flip remoteCipher gc =<< localcache creds - | otherwise -> localcache creds + localcache creds + storeconfig creds key =<< remoteCipher pc gc + | otherwise -> do + localcache creds + return c where - localcache creds = do - writeCacheCredPair creds storage - return c + localcache creds = writeCacheCredPair creds storage storeconfig creds key (Just cipher) = do cmd <- gpgCmd <$> Annex.getGitConfig - s <- liftIO $ encrypt cmd (c, gc) cipher + s <- liftIO $ encrypt cmd (pc, gc) cipher (feedBytes $ L.pack $ encodeCredPair creds) (readBytes $ return . L.unpack) - return $ M.insert key (toB64 s) c + return $ M.insert key (mkval (Accepted (toB64 s))) c storeconfig creds key Nothing = - return $ M.insert key (toB64 $ encodeCredPair creds) c + return $ M.insert key (mkval (Accepted (toB64 $ encodeCredPair creds))) c + + pc = parseconfig c {- Gets a remote's credpair, from the environment if set, otherwise - from the cache in gitAnnexCredsDir, or failing that, from the - value in RemoteConfig. -} -getRemoteCredPair :: RemoteConfig -> RemoteGitConfig -> CredPairStorage -> Annex (Maybe CredPair) +getRemoteCredPair :: ParsedRemoteConfig -> RemoteGitConfig -> CredPairStorage -> Annex (Maybe CredPair) getRemoteCredPair c gc storage = maybe fromcache (return . Just) =<< fromenv where fromenv = liftIO $ getEnvCredPair storage @@ -86,7 +105,7 @@ getRemoteCredPair c gc storage = maybe fromcache (return . Just) =<< fromenv fromconfig = do let key = credPairRemoteField storage mcipher <- remoteCipher' c gc - case (M.lookup key c, mcipher) of + case (fromProposedAccepted <$> getRemoteConfigValue key c, mcipher) of (Nothing, _) -> return Nothing (Just enccreds, Just (cipher, storablecipher)) -> fromenccreds enccreds cipher storablecipher @@ -114,7 +133,7 @@ getRemoteCredPair c gc storage = maybe fromcache (return . Just) =<< fromenv return $ Just credpair _ -> error "bad creds" -getRemoteCredPairFor :: String -> RemoteConfig -> RemoteGitConfig -> CredPairStorage -> Annex (Maybe CredPair) +getRemoteCredPairFor :: String -> ParsedRemoteConfig -> RemoteGitConfig -> CredPairStorage -> Annex (Maybe CredPair) getRemoteCredPairFor this c gc storage = go =<< getRemoteCredPair c gc storage where go Nothing = do @@ -183,7 +202,7 @@ removeCreds file = do let f = d file liftIO $ nukeFile f -includeCredsInfo :: RemoteConfig -> CredPairStorage -> [(String, String)] -> Annex [(String, String)] +includeCredsInfo :: ParsedRemoteConfig -> CredPairStorage -> [(String, String)] -> Annex [(String, String)] includeCredsInfo c storage info = do v <- liftIO $ getEnvCredPair storage case v of diff --git a/Crypto.hs b/Crypto.hs index 08aef47cd5..88b85aa7bc 100644 --- a/Crypto.hs +++ b/Crypto.hs @@ -3,7 +3,7 @@ - Currently using gpg; could later be modified to support different - crypto backends if neccessary. - - - Copyright 2011-2016 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -13,6 +13,7 @@ {-# LANGUAGE Rank2Types #-} module Crypto ( + EncryptionMethod(..), Cipher, KeyIds(..), EncKey, @@ -37,7 +38,6 @@ module Crypto ( import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L import Data.ByteString.UTF8 (fromString) -import qualified Data.Map as M import Control.Monad.IO.Class import Annex.Common @@ -232,14 +232,18 @@ class LensGpgEncParams a where {- Extract the GnuPG options from a pair of a Remote Config and a Remote - Git Config. -} -instance LensGpgEncParams (RemoteConfig, RemoteGitConfig) where +instance LensGpgEncParams (ParsedRemoteConfig, RemoteGitConfig) where getGpgEncParamsBase (_c,gc) = map Param (remoteAnnexGnupgOptions gc) getGpgEncParams (c,gc) = getGpgEncParamsBase (c,gc) ++ {- When the remote is configured to use public-key encryption, - look up the recipient keys and add them to the option list. -} - case M.lookup encryptionField c of - Just "pubkey" -> Gpg.pkEncTo $ maybe [] (splitc ',') $ M.lookup cipherkeysField c - Just "sharedpubkey" -> Gpg.pkEncTo $ maybe [] (splitc ',') $ M.lookup pubkeysField c + case getRemoteConfigValue encryptionField c of + Just PubKeyEncryption -> + Gpg.pkEncTo $ maybe [] (splitc ',') $ + getRemoteConfigValue cipherkeysField c + Just SharedPubKeyEncryption -> + Gpg.pkEncTo $ maybe [] (splitc ',') $ + getRemoteConfigValue pubkeysField c _ -> [] getGpgDecParams (_c,gc) = map Param (remoteAnnexGnupgDecryptOptions gc) diff --git a/Database/ContentIdentifier.hs b/Database/ContentIdentifier.hs index 522b80973e..ae8a72c1b3 100644 --- a/Database/ContentIdentifier.hs +++ b/Database/ContentIdentifier.hs @@ -5,11 +5,16 @@ - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE CPP #-} {-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} {-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts, EmptyDataDecls #-} {-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE UndecidableInstances #-} +#if MIN_VERSION_persistent_template(2,8,0) +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE StandaloneDeriving #-} +#endif module Database.ContentIdentifier ( ContentIdentifierHandle, diff --git a/Database/Export.hs b/Database/Export.hs index 71ff67dcd7..1b311d35a0 100644 --- a/Database/Export.hs +++ b/Database/Export.hs @@ -5,11 +5,16 @@ - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE CPP #-} {-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} {-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} {-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE UndecidableInstances #-} +#if MIN_VERSION_persistent_template(2,8,0) +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE StandaloneDeriving #-} +#endif module Database.Export ( ExportHandle, @@ -224,7 +229,7 @@ runExportDiffUpdater updater h old new = do void $ liftIO cleanup where getek sha - | sha == nullSha = return Nothing + | sha `elem` nullShas = return Nothing | otherwise = Just <$> exportKey sha {- Diff from the old to the new tree and update the ExportTree table. -} diff --git a/Database/Fsck.hs b/Database/Fsck.hs index 0683e5d283..caa34452df 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -5,11 +5,16 @@ - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE CPP #-} {-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} {-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} {-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE UndecidableInstances #-} +#if MIN_VERSION_persistent_template(2,8,0) +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE StandaloneDeriving #-} +#endif module Database.Fsck ( FsckHandle, diff --git a/Database/Keys.hs b/Database/Keys.hs index 77506d1b26..b1650b286c 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -260,7 +260,7 @@ reconcileStaged qh = do , Param "--cached" , Param "--raw" , Param "-z" - , Param "--abbrev=40" + , Param "--no-abbrev" -- Optimization: Only find pointer files. This is not -- perfect. A file could start with this and not be a -- pointer file. And a pointer file that is replaced with diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs index cc307f996a..f96433b36b 100644 --- a/Database/Keys/SQL.hs +++ b/Database/Keys/SQL.hs @@ -5,11 +5,16 @@ - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE CPP #-} {-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} {-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} {-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} {-# LANGUAGE RankNTypes, ScopedTypeVariables #-} {-# LANGUAGE UndecidableInstances #-} +#if MIN_VERSION_persistent_template(2,8,0) +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE StandaloneDeriving #-} +#endif module Database.Keys.SQL where diff --git a/Git/CatFile.hs b/Git/CatFile.hs index 6402001ebd..980d289840 100644 --- a/Git/CatFile.hs +++ b/Git/CatFile.hs @@ -148,13 +148,12 @@ parseResp object l | " missing" `isSuffixOf` l -- less expensive than full check && l == fromRef object ++ " missing" = Just DNE | otherwise = case words l of - [sha, objtype, size] - | length sha == shaSize -> - case (readObjectType (encodeBS objtype), reads size) of - (Just t, [(bytes, "")]) -> - Just $ ParsedResp (Ref sha) bytes t - _ -> Nothing - | otherwise -> Nothing + [sha, objtype, size] -> case extractSha sha of + Just sha' -> case (readObjectType (encodeBS objtype), reads size) of + (Just t, [(bytes, "")]) -> + Just $ ParsedResp sha' bytes t + _ -> Nothing + Nothing -> Nothing _ -> Nothing querySingle :: CommandParam -> Ref -> Repo -> (Handle -> IO a) -> IO (Maybe a) diff --git a/Git/Config.hs b/Git/Config.hs index 4b606641d8..f880286234 100644 --- a/Git/Config.hs +++ b/Git/Config.hs @@ -1,6 +1,6 @@ {- git repository configuration handling - - - Copyright 2010-2019 Joey Hess + - Copyright 2010-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -14,6 +14,7 @@ import qualified Data.ByteString as S import qualified Data.ByteString.Char8 as S8 import Data.Char import qualified System.FilePath.ByteString as P +import Control.Concurrent.Async import Common import Git @@ -184,19 +185,22 @@ coreBare = "core.bare" {- Runs a command to get the configuration of a repo, - and returns a repo populated with the configuration, as well as the raw - - output of the command. -} -fromPipe :: Repo -> String -> [CommandParam] -> IO (Either SomeException (Repo, S.ByteString)) + - output and any standard output of the command. -} +fromPipe :: Repo -> String -> [CommandParam] -> IO (Either SomeException (Repo, S.ByteString, S.ByteString)) fromPipe r cmd params = try $ - withHandle StdoutHandle createProcessSuccess p $ \h -> do - val <- S.hGetContents h + withOEHandles createProcessSuccess p $ \(hout, herr) -> do + geterr <- async $ S.hGetContents herr + getval <- async $ S.hGetContents hout + val <- wait getval + err <- wait geterr r' <- store val r - return (r', val) + return (r', val, err) where p = proc cmd $ toCommand params {- Reads git config from a specified file and returns the repo populated - with the configuration. -} -fromFile :: Repo -> FilePath -> IO (Either SomeException (Repo, S.ByteString)) +fromFile :: Repo -> FilePath -> IO (Either SomeException (Repo, S.ByteString, S.ByteString)) fromFile r f = fromPipe r "git" [ Param "config" , Param "--file" diff --git a/Git/Credential.hs b/Git/Credential.hs index 5de95d1a37..9465d27963 100644 --- a/Git/Credential.hs +++ b/Git/Credential.hs @@ -1,6 +1,6 @@ {- git credential interface - - - Copyright 2019 Joey Hess + - Copyright 2019-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -22,6 +22,23 @@ credentialUsername = M.lookup "username" . fromCredential credentialPassword :: Credential -> Maybe String credentialPassword = M.lookup "password" . fromCredential +credentialBasicAuth :: Credential -> Maybe BasicAuth +credentialBasicAuth cred = BasicAuth + <$> credentialUsername cred + <*> credentialPassword cred + +getBasicAuthFromCredential :: Repo -> GetBasicAuth +getBasicAuthFromCredential r u = do + c <- getUrlCredential u r + case credentialBasicAuth c of + Just ba -> return $ Just (ba, signalsuccess c) + Nothing -> do + signalsuccess c False + return Nothing + where + signalsuccess c True = approveUrlCredential c r + signalsuccess c False = rejectUrlCredential c r + -- | This may prompt the user for login information, or get cached login -- information. getUrlCredential :: URLString -> Repo -> IO Credential diff --git a/Git/DiffTree.hs b/Git/DiffTree.hs index 5f556b1ee8..bfd1a7a1bc 100644 --- a/Git/DiffTree.hs +++ b/Git/DiffTree.hs @@ -77,14 +77,14 @@ diffFiles = getdiff (Param "diff-files") - is adjusted to be the same as diff-tree --raw._-} diffLog :: [CommandParam] -> Repo -> IO ([DiffTreeItem], IO Bool) diffLog params = getdiff (Param "log") - (Param "-n1" : Param "--abbrev=40" : Param "--pretty=format:" : params) + (Param "-n1" : Param "--no-abbrev" : Param "--pretty=format:" : params) {- Uses git show to get the changes made by a commit. - - Does not support merge commits, and will fail on them. -} commitDiff :: Sha -> Repo -> IO ([DiffTreeItem], IO Bool) commitDiff ref = getdiff (Param "show") - [ Param "--abbrev=40", Param "--pretty=", Param "--raw", Param (fromRef ref) ] + [ Param "--no-abbrev", Param "--pretty=", Param "--raw", Param (fromRef ref) ] getdiff :: CommandParam -> [CommandParam] -> Repo -> IO ([DiffTreeItem], IO Bool) getdiff command params repo = do @@ -119,10 +119,7 @@ parseDiffRaw l = go l readmode = fst . Prelude.head . readOct -- info = : SP SP SP SP - -- All fields are fixed, so we can pull them out of - -- specific positions in the line. (srcm, past_srcm) = splitAt 7 $ drop 1 info (dstm, past_dstm) = splitAt 7 past_srcm - (ssha, past_ssha) = splitAt shaSize past_dstm - (dsha, past_dsha) = splitAt shaSize $ drop 1 past_ssha - s = drop 1 past_dsha + (ssha, past_ssha) = separate (== ' ') past_dstm + (dsha, s) = separate (== ' ') past_ssha diff --git a/Git/DiffTreeItem.hs b/Git/DiffTreeItem.hs index ffda2e8eea..4034e5ecfb 100644 --- a/Git/DiffTreeItem.hs +++ b/Git/DiffTreeItem.hs @@ -17,8 +17,8 @@ import Git.Types data DiffTreeItem = DiffTreeItem { srcmode :: FileMode , dstmode :: FileMode - , srcsha :: Sha -- nullSha if file was added - , dstsha :: Sha -- nullSha if file was deleted + , srcsha :: Sha -- null sha if file was added + , dstsha :: Sha -- null sha if file was deleted , status :: String , file :: TopFilePath } deriving Show diff --git a/Git/LsFiles.hs b/Git/LsFiles.hs index 49cf10304a..952e997ea6 100644 --- a/Git/LsFiles.hs +++ b/Git/LsFiles.hs @@ -162,17 +162,20 @@ stagedDetails = stagedDetails' [] stagedDetails' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool) stagedDetails' ps l repo = do (ls, cleanup) <- pipeNullSplit params repo - return (map parse ls, cleanup) + return (map parseStagedDetails ls, cleanup) where params = Param "ls-files" : Param "--stage" : Param "-z" : ps ++ Param "--" : map (File . fromRawFilePath) l - parse s - | null file = (L.toStrict s, Nothing, Nothing) - | otherwise = (toRawFilePath file, extractSha $ take shaSize rest, readmode mode) - where - (metadata, file) = separate (== '\t') (decodeBL' s) - (mode, rest) = separate (== ' ') metadata - readmode = fst <$$> headMaybe . readOct + +parseStagedDetails :: L.ByteString -> StagedDetails +parseStagedDetails s + | null file = (L.toStrict s, Nothing, Nothing) + | otherwise = (toRawFilePath file, extractSha sha, readmode mode) + where + (metadata, file) = separate (== '\t') (decodeBL' s) + (mode, metadata') = separate (== ' ') metadata + (sha, _) = separate (== ' ') metadata' + readmode = fst <$$> headMaybe . readOct {- Returns a list of the files in the specified locations that are staged - for commit, and whose type has changed. -} diff --git a/Git/LsTree.hs b/Git/LsTree.hs index a3d8383934..5175c39024 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -21,7 +21,6 @@ module Git.LsTree ( import Common import Git import Git.Command -import Git.Sha import Git.FilePath import qualified Git.Filename import Utility.Attoparsec @@ -94,10 +93,10 @@ parserLsTree = TreeItem <$> octal <* A8.char ' ' -- type - <*> A.takeTill (== 32) + <*> A8.takeTill (== ' ') <* A8.char ' ' -- sha - <*> (Ref . decodeBS' <$> A.take shaSize) + <*> (Ref . decodeBS' <$> A8.takeTill (== '\t')) <* A8.char '\t' -- file <*> (asTopFilePath . Git.Filename.decode <$> A.takeByteString) diff --git a/Git/Sha.hs b/Git/Sha.hs index cc33cac65d..24fe546192 100644 --- a/Git/Sha.hs +++ b/Git/Sha.hs @@ -1,6 +1,6 @@ {- git SHA stuff - - - Copyright 2011 Joey Hess + - Copyright 2011,2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -21,8 +21,8 @@ getSha subcommand a = maybe bad return =<< extractSha <$> a - it, but nothing else. -} extractSha :: String -> Maybe Sha extractSha s - | len == shaSize = val s - | len == shaSize + 1 && length s' == shaSize = val s' + | len `elem` shaSizes = val s + | len - 1 `elem` shaSizes && length s' == len - 1 = val s' | otherwise = Nothing where len = length s @@ -31,13 +31,30 @@ extractSha s | all (`elem` "1234567890ABCDEFabcdef") v = Just $ Ref v | otherwise = Nothing -{- Size of a git sha. -} -shaSize :: Int -shaSize = 40 +{- Sizes of git shas. -} +shaSizes :: [Int] +shaSizes = + [ 40 -- sha1 (must come first) + , 64 -- sha256 + ] -nullSha :: Ref -nullSha = Ref $ replicate shaSize '0' +{- Git plumbing often uses a all 0 sha to represent things like a + - deleted file. -} +nullShas :: [Sha] +nullShas = map (\n -> Ref (replicate n '0')) shaSizes -{- Git's magic empty tree. -} +{- Sha to provide to git plumbing when deleting a file. + - + - It's ok to provide a sha1; git versions that use sha256 will map the + - sha1 to the sha256, or probably just treat all null sha1 specially + - the same as all null sha256. -} +deleteSha :: Sha +deleteSha = Prelude.head nullShas + +{- Git's magic empty tree. + - + - It's ok to provide the sha1 of this to git to refer to an empty tree; + - git versions that use sha256 will map the sha1 to the sha256. + -} emptyTree :: Ref emptyTree = Ref "4b825dc642cb6eb9a060e54bf8d69288fbee4904" diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs index c88b36c1b2..2100f1dcf9 100644 --- a/Git/UnionMerge.hs +++ b/Git/UnionMerge.hs @@ -82,7 +82,7 @@ doMerge hashhandle ch differ repo streamer = do - a line suitable for update-index that union merges the two sides of the - diff. -} mergeFile :: String -> RawFilePath -> HashObjectHandle -> CatFileHandle -> IO (Maybe L.ByteString) -mergeFile info file hashhandle h = case filter (/= nullSha) [Ref asha, Ref bsha] of +mergeFile info file hashhandle h = case filter (`notElem` nullShas) [Ref asha, Ref bsha] of [] -> return Nothing (sha:[]) -> use sha shas -> use diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs index 9f07cf54ed..68dc8b7097 100644 --- a/Git/UpdateIndex.hs +++ b/Git/UpdateIndex.hs @@ -108,7 +108,7 @@ unstageFile file repo = do unstageFile' :: TopFilePath -> Streamer unstageFile' p = pureStreamer $ L.fromStrict $ "0 " - <> encodeBS' (fromRef nullSha) + <> encodeBS' (fromRef deleteSha) <> "\t" <> indexPath p diff --git a/Key.hs b/Key.hs index f759f54817..7d6dfa3af2 100644 --- a/Key.hs +++ b/Key.hs @@ -1,6 +1,6 @@ {- git-annex Keys - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -28,6 +28,7 @@ module Key ( prop_isomorphic_key_encode ) where +import Data.Char import qualified Data.Text as T import qualified Data.ByteString as S import qualified Data.Attoparsec.ByteString as A @@ -79,11 +80,15 @@ instance Arbitrary KeyData where <*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative -- AssociatedFile cannot be empty, and cannot contain a NUL --- (but can be Nothing) +-- (but can be Nothing). instance Arbitrary AssociatedFile where - arbitrary = (AssociatedFile . fmap toRawFilePath <$> arbitrary) + arbitrary = (AssociatedFile . fmap conv <$> arbitrary) `suchThat` (/= AssociatedFile (Just S.empty)) `suchThat` (\(AssociatedFile f) -> maybe True (S.notElem 0) f) + where + -- Generating arbitrary unicode leads to encoding errors + -- when LANG=C, so limit to ascii. + conv = toRawFilePath . filter isAscii instance Arbitrary Key where arbitrary = mkKey . const <$> arbitrary diff --git a/Logs.hs b/Logs.hs index 5faec561ef..906f406793 100644 --- a/Logs.hs +++ b/Logs.hs @@ -1,6 +1,6 @@ {- git-annex log file names - - - Copyright 2013-2019 Joey Hess + - Copyright 2013-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -27,8 +27,8 @@ data LogVariety {- Converts a path from the git-annex branch into one of the varieties - of logs used by git-annex, if it's a known path. -} -getLogVariety :: RawFilePath -> Maybe LogVariety -getLogVariety f +getLogVariety :: GitConfig -> RawFilePath -> Maybe LogVariety +getLogVariety config f | f `elem` topLevelOldUUIDBasedLogs = Just OldUUIDBasedLog | f `elem` topLevelNewUUIDBasedLogs = Just NewUUIDBasedLog | isRemoteStateLog f = Just NewUUIDBasedLog @@ -36,7 +36,7 @@ getLogVariety f | isChunkLog f = ChunkLog <$> extLogFileKey chunkLogExt f | isRemoteMetaDataLog f = Just RemoteMetaDataLog | isMetaDataLog f || f `elem` otherLogs = Just OtherLog - | otherwise = PresenceLog <$> firstJust (presenceLogs f) + | otherwise = PresenceLog <$> firstJust (presenceLogs config f) {- All the old-format uuid-based logs stored in the top of the git-annex branch. -} topLevelOldUUIDBasedLogs :: [RawFilePath] @@ -61,10 +61,10 @@ topLevelNewUUIDBasedLogs = {- All the ways to get a key from a presence log file -} -presenceLogs :: RawFilePath -> [Maybe Key] -presenceLogs f = +presenceLogs :: GitConfig -> RawFilePath -> [Maybe Key] +presenceLogs config f = [ urlLogFileKey f - , locationLogFileKey f + , locationLogFileKey config f ] {- Top-level logs that are neither UUID based nor presence logs. -} @@ -218,8 +218,17 @@ urlLogFileKey :: RawFilePath -> Maybe Key urlLogFileKey = extLogFileKey urlLogExt {- Converts a pathname into a key if it's a location log. -} -locationLogFileKey :: RawFilePath -> Maybe Key -locationLogFileKey path - -- Want only xx/yy/foo.log, not .log files in other places. - | length (splitDirectories (fromRawFilePath path)) /= 3 = Nothing +locationLogFileKey :: GitConfig -> RawFilePath -> Maybe Key +locationLogFileKey config path + | length (splitDirectories (fromRawFilePath path)) /= locationLogFileDepth config = Nothing | otherwise = extLogFileKey ".log" path + +{- Depth of location log files within the git-annex branch. + - + - Normally they are xx/yy/key.log so depth 3. + - The same extension is also used for other logs that + - are not location logs. -} +locationLogFileDepth :: GitConfig -> Int +locationLogFileDepth config = hashlevels + 1 + where + HashLevels hashlevels = branchHashLevels config diff --git a/Logs/Location.hs b/Logs/Location.hs index 66532ae413..2c3439805f 100644 --- a/Logs/Location.hs +++ b/Logs/Location.hs @@ -130,8 +130,10 @@ loggedKeys :: Annex [Unchecked Key] loggedKeys = loggedKeys' (not <$$> checkDead) loggedKeys' :: (Key -> Annex Bool) -> Annex [Unchecked Key] -loggedKeys' check = mapMaybe (defercheck <$$> locationLogFileKey) - <$> Annex.Branch.files +loggedKeys' check = do + config <- Annex.getGitConfig + mapMaybe (defercheck <$$> locationLogFileKey config) + <$> Annex.Branch.files where defercheck k = Unchecked $ ifM (check k) ( return (Just k) diff --git a/Logs/Remote/Pure.hs b/Logs/Remote/Pure.hs index e855d87200..7d05269be6 100644 --- a/Logs/Remote/Pure.hs +++ b/Logs/Remote/Pure.hs @@ -19,6 +19,7 @@ module Logs.Remote.Pure ( import Annex.Common import Types.Remote +import Types.ProposedAccepted import Logs.UUIDBased import Annex.SpecialRemote.Config @@ -40,24 +41,24 @@ buildRemoteConfigLog :: Log RemoteConfig -> Builder buildRemoteConfigLog = buildLogOld (byteString . encodeBS . showConfig) remoteConfigParser :: A.Parser RemoteConfig -remoteConfigParser = keyValToConfig . words . decodeBS <$> A.takeByteString +remoteConfigParser = keyValToConfig Accepted . words . decodeBS <$> A.takeByteString showConfig :: RemoteConfig -> String showConfig = unwords . configToKeyVal {- Given Strings like "key=value", generates a RemoteConfig. -} -keyValToConfig :: [String] -> RemoteConfig -keyValToConfig ws = M.fromList $ map (/=/) ws +keyValToConfig :: (String -> ProposedAccepted String) -> [String] -> RemoteConfig +keyValToConfig mk ws = M.fromList $ map (/=/) ws where - (/=/) s = (k, v) + (/=/) s = (mk k, mk v) where k = takeWhile (/= '=') s v = configUnEscape $ drop (1 + length k) s -configToKeyVal :: M.Map String String -> [String] +configToKeyVal :: RemoteConfig -> [String] configToKeyVal m = map toword $ sort $ M.toList m where - toword (k, v) = k ++ "=" ++ configEscape v + toword (k, v) = fromProposedAccepted k ++ "=" ++ configEscape (fromProposedAccepted v) configEscape :: String -> String configEscape = concatMap escape @@ -90,9 +91,9 @@ prop_isomorphic_configEscape s = s == (configUnEscape . configEscape) s prop_parse_show_Config :: RemoteConfig -> Bool prop_parse_show_Config c -- whitespace and '=' are not supported in config keys - | any (\k -> any isSpace k || elem '=' k) (M.keys c) = True - | any (any excluded) (M.keys c) = True - | any (any excluded) (M.elems c) = True + | any (\k -> any isSpace k || elem '=' k) (map fromProposedAccepted $ M.keys c) = True + | any (any excluded) (map fromProposedAccepted $ M.keys c) = True + | any (any excluded) (map fromProposedAccepted $ M.elems c) = True | otherwise = A.parseOnly remoteConfigParser (encodeBS $ showConfig c) ~~ Right c where normalize v = sort . M.toList <$> v diff --git a/Makefile b/Makefile index 965f53e1fc..722921e002 100644 --- a/Makefile +++ b/Makefile @@ -84,9 +84,9 @@ install-completions: build install -d $(DESTDIR)$(ZSH_COMPLETIONS_PATH) ./git-annex --zsh-completion-script git-annex 2>/dev/null \ > $(DESTDIR)$(ZSH_COMPLETIONS_PATH)/_git-annex - install -d $(DESTDIR)$(PREFIX)/$(SHAREDIR)/fish/completions + install -d $(DESTDIR)$(PREFIX)/$(SHAREDIR)/fish/vendor_completions.d ./git-annex --fish-completion-script git-annex 2>/dev/null \ - > $(DESTDIR)$(PREFIX)/$(SHAREDIR)/fish/completions/git-annex.fish + > $(DESTDIR)$(PREFIX)/$(SHAREDIR)/fish/vendor_completions.d/git-annex.fish test: git-annex git-annex-shell ./git-annex test diff --git a/Messages.hs b/Messages.hs index 77ebdb9714..71c299fb8f 100644 --- a/Messages.hs +++ b/Messages.hs @@ -1,6 +1,6 @@ {- git-annex output messages - - - Copyright 2010-2019 Joey Hess + - Copyright 2010-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -48,6 +48,7 @@ module Messages ( outputMessage, withMessageState, prompt, + mkPrompter, ) where import System.Log.Logger @@ -55,6 +56,7 @@ import System.Log.Formatter import System.Log.Handler (setFormatter) import System.Log.Handler.Simple import Control.Concurrent +import Control.Monad.IO.Class import qualified Data.ByteString as S import Common @@ -290,14 +292,21 @@ commandProgressDisabled = withMessageState $ \s -> return $ - the user. -} prompt :: Annex a -> Annex a -prompt a = debugLocks $ Annex.getState Annex.concurrency >>= \case - NonConcurrent -> a +prompt a = do + p <- mkPrompter + p a + +{- Like prompt, but for a non-annex action that prompts. -} +mkPrompter :: (MonadMask m, MonadIO m) => Annex (m a -> m a) +mkPrompter = Annex.getState Annex.concurrency >>= \case + NonConcurrent -> return id (Concurrent _) -> goconcurrent ConcurrentPerCpu -> goconcurrent where goconcurrent = withMessageState $ \s -> do let l = promptLock s - bracketIO - (takeMVar l) - (putMVar l) - (const $ hideRegionsWhile s a) + return $ \a -> + debugLocks $ bracketIO + (takeMVar l) + (putMVar l) + (const $ hideRegionsWhile s a) diff --git a/Messages/Concurrent.hs b/Messages/Concurrent.hs index 3cc5258359..94554aff5d 100644 --- a/Messages/Concurrent.hs +++ b/Messages/Concurrent.hs @@ -18,6 +18,7 @@ import Common import qualified System.Console.Concurrent as Console import qualified System.Console.Regions as Regions import Control.Concurrent.STM +import Control.Monad.IO.Class import qualified Data.Text as T #ifndef mingw32_HOST_OS import GHC.IO.Encoding @@ -120,13 +121,14 @@ concurrentOutputSupported = return True -- Windows is always unicode {- Hide any currently displayed console regions while running the action, - so that the action can use the console itself. -} -hideRegionsWhile :: MessageState -> Annex a -> Annex a -hideRegionsWhile s a - | concurrentOutputEnabled s = bracketIO setup cleanup go +hideRegionsWhile :: (MonadIO m, Monad m, MonadMask m) => MessageState -> m a -> m a +hideRegionsWhile s a + | concurrentOutputEnabled s = bracket setup cleanup go | otherwise = a where - setup = Regions.waitDisplayChange $ swapTMVar Regions.regionList [] - cleanup = void . atomically . swapTMVar Regions.regionList + setup = liftIO $ + Regions.waitDisplayChange $ swapTMVar Regions.regionList [] + cleanup = liftIO . void . atomically . swapTMVar Regions.regionList go _ = do liftIO $ hFlush stdout a diff --git a/Remote.hs b/Remote.hs index fb096736ee..a1a07f95dc 100644 --- a/Remote.hs +++ b/Remote.hs @@ -24,7 +24,7 @@ module Remote ( remoteTypes, remoteList, remoteList', - gitSyncableRemote, + gitSyncableRemoteType, remoteMap, remoteMap', uuidDescriptions, @@ -131,7 +131,7 @@ byNameWithUUID = checkuuid <=< byName repo <- getRepo r ifM (liftIO $ getDynamicConfig $ remoteAnnexIgnore (gitconfig r)) ( giveup $ noRemoteUUIDMsg r ++ - " (" ++ show (remoteConfig repo "ignore") ++ + " (" ++ show (remoteAnnexConfig repo "ignore") ++ " is set)" , giveup $ noRemoteUUIDMsg r ) diff --git a/Remote/Adb.hs b/Remote/Adb.hs index e7e8fae3b9..4468c8ee94 100644 --- a/Remote/Adb.hs +++ b/Remote/Adb.hs @@ -1,6 +1,6 @@ {- Remote on Android device accessed using adb. - - - Copyright 2018-2019 Joey Hess + - Copyright 2018-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -19,6 +19,8 @@ import Remote.Helper.Messages import Remote.Helper.ExportImport import Annex.UUID import Utility.Metered +import Types.ProposedAccepted +import Annex.SpecialRemote.Config import qualified Data.Map as M import qualified System.FilePath.Posix as Posix @@ -31,16 +33,28 @@ newtype AndroidSerial = AndroidSerial { fromAndroidSerial :: String } newtype AndroidPath = AndroidPath { fromAndroidPath :: FilePath } remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "adb" , enumerate = const (findSpecialRemotes "adb") , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser androiddirectoryField + (FieldDesc "location on the Android device where the files are stored") + , optionalStringParser androidserialField + (FieldDesc "sometimes needed to specify which Android device to use") + ] , setup = adbSetup , exportSupported = exportIsSupported , importSupported = importIsSupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +androiddirectoryField :: RemoteConfigField +androiddirectoryField = Accepted "androiddirectory" + +androidserialField :: RemoteConfigField +androidserialField = Accepted "androidserial" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = do let this = Remote { uuid = u @@ -109,10 +123,12 @@ adbSetup _ mu _ c gc = do u <- maybe (liftIO genUUID) return mu -- verify configuration - adir <- maybe (giveup "Specify androiddirectory=") (pure . AndroidPath) - (M.lookup "androiddirectory" c) + adir <- maybe + (giveup "Specify androiddirectory=") + (pure . AndroidPath . fromProposedAccepted) + (M.lookup androiddirectoryField c) serial <- getserial =<< liftIO enumerateAdbConnected - let c' = M.insert "androidserial" (fromAndroidSerial serial) c + let c' = M.insert androidserialField (Proposed (fromAndroidSerial serial)) c (c'', _encsetup) <- encryptionSetup c' gc @@ -130,7 +146,7 @@ adbSetup _ mu _ c gc = do return (c'', u) where getserial [] = giveup "adb does not list any connected android devices. Plug in an Android device, or configure adb, and try again.." - getserial l = case M.lookup "androidserial" c of + getserial l = case fromProposedAccepted <$> M.lookup androidserialField c of Nothing -> case l of (s:[]) -> return s _ -> giveup $ unlines $ diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index 0bbf4b24a7..9c1b96a05d 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -41,6 +41,7 @@ remote = RemoteType { typename = "bittorrent" , enumerate = list , generate = gen + , configParser = mkRemoteConfigParser [] , setup = error "not supported" , exportSupported = exportUnsupported , importSupported = importUnsupported @@ -52,7 +53,7 @@ list _autoinit = do r <- liftIO $ Git.Construct.remoteNamed "bittorrent" (pure Git.Construct.fromUnknown) return [r] -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r _ c gc rs = do cst <- remoteCost gc expensiveRemoteCost return $ Just Remote diff --git a/Remote/Bup.hs b/Remote/Bup.hs index b1ba5f1870..21d3eb0097 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -1,6 +1,6 @@ {- Using bup as a remote. - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -25,6 +25,7 @@ import qualified Git.Ref import Config import Config.Cost import qualified Remote.Helper.Ssh as Ssh +import Annex.SpecialRemote.Config import Remote.Helper.Special import Remote.Helper.Messages import Remote.Helper.ExportImport @@ -33,20 +34,28 @@ import Utility.UserInfo import Annex.UUID import Annex.Ssh import Utility.Metered +import Types.ProposedAccepted type BupRepo = String remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "bup" , enumerate = const (findSpecialRemotes "buprepo") , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser buprepoField + (FieldDesc "(required) bup repository to use") + ] , setup = bupSetup , exportSupported = exportUnsupported , importSupported = importUnsupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +buprepoField :: RemoteConfigField +buprepoField = Accepted "buprepo" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = do bupr <- liftIO $ bup2GitRemote buprepo cst <- remoteCost gc $ @@ -108,8 +117,8 @@ bupSetup _ mu _ c gc = do u <- maybe (liftIO genUUID) return mu -- verify configuration is sane - let buprepo = fromMaybe (giveup "Specify buprepo=") $ - M.lookup "buprepo" c + let buprepo = maybe (giveup "Specify buprepo=") fromProposedAccepted $ + M.lookup buprepoField c (c', _encsetup) <- encryptionSetup c gc -- bup init will create the repository. diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index f34d045f61..29f3f5bf52 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -18,11 +18,13 @@ import Types.Creds import qualified Git import Config import Config.Cost +import Annex.SpecialRemote.Config import Remote.Helper.Special import Remote.Helper.ExportImport import Annex.Ssh import Annex.UUID import Utility.SshHost +import Types.ProposedAccepted data DdarRepo = DdarRepo { ddarRepoConfig :: RemoteGitConfig @@ -30,16 +32,23 @@ data DdarRepo = DdarRepo } remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "ddar" , enumerate = const (findSpecialRemotes "ddarrepo") , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser ddarrepoField + (FieldDesc "(required) location of ddar archive to use") + ] , setup = ddarSetup , exportSupported = exportUnsupported , importSupported = importUnsupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +ddarrepoField :: RemoteConfigField +ddarrepoField = Accepted "ddarrepo" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = do cst <- remoteCost gc $ if ddarLocal ddarrepo @@ -98,8 +107,8 @@ ddarSetup _ mu _ c gc = do u <- maybe (liftIO genUUID) return mu -- verify configuration is sane - let ddarrepo = fromMaybe (giveup "Specify ddarrepo=") $ - M.lookup "ddarrepo" c + let ddarrepo = maybe (giveup "Specify ddarrepo=") fromProposedAccepted $ + M.lookup ddarrepoField c (c', _encsetup) <- encryptionSetup c gc -- The ddarrepo is stored in git config, as well as this repo's diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 3aa6185155..e9162080ce 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -1,6 +1,6 @@ {- A "remote" that is just a filesystem directory. - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -24,6 +24,7 @@ import Types.Creds import qualified Git import Config.Cost import Config +import Annex.SpecialRemote.Config import Utility.FileMode import Remote.Helper.Special import Remote.Helper.ExportImport @@ -34,18 +35,26 @@ import Annex.UUID import Utility.Metered import Utility.Tmp import Utility.InodeCache +import Types.ProposedAccepted remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "directory" , enumerate = const (findSpecialRemotes "directory") , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser directoryField + (FieldDesc "(required) where the special remote stores data") + ] , setup = directorySetup , exportSupported = exportIsSupported , importSupported = importIsSupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +directoryField :: RemoteConfigField +directoryField = Accepted "directory" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = do cst <- remoteCost gc cheapRemoteCost let chunkconfig = getChunkConfig c @@ -111,8 +120,8 @@ directorySetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> directorySetup _ mu _ c gc = do u <- maybe (liftIO genUUID) return mu -- verify configuration is sane - let dir = fromMaybe (giveup "Specify directory=") $ - M.lookup "directory" c + let dir = maybe (giveup "Specify directory=") fromProposedAccepted $ + M.lookup directoryField c absdir <- liftIO $ absPath dir liftIO $ unlessM (doesDirectoryExist absdir) $ giveup $ "Directory does not exist: " ++ absdir @@ -121,7 +130,7 @@ directorySetup _ mu _ c gc = do -- The directory is stored in git config, not in this remote's -- persistant state, so it can vary between hosts. gitConfigSpecialRemote u c' [("directory", absdir)] - return (M.delete "directory" c', u) + return (M.delete directoryField c', u) {- Locations to try to access a given Key in the directory. - We try more than one since we used to write to different hash diff --git a/Remote/External.hs b/Remote/External.hs index 016000badb..4eedb50bc8 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -1,6 +1,6 @@ {- External special remote interface. - - - Copyright 2013-2018 Joey Hess + - Copyright 2013-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -16,10 +16,12 @@ import Types.Remote import Types.Export import Types.CleanupActions import Types.UrlContents +import Types.ProposedAccepted import qualified Git import Config -import Git.Config (isTrueFalse, boolConfig) +import Git.Config (boolConfig) import Git.Env +import Annex.SpecialRemote.Config import Remote.Helper.Special import Remote.Helper.ExportImport import Remote.Helper.ReadOnly @@ -41,18 +43,26 @@ import Control.Concurrent.STM import Control.Concurrent.Async import System.Log.Logger (debugM) import qualified Data.Map as M +import qualified Data.Set as S remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "external" , enumerate = const (findSpecialRemotes "externaltype") , generate = gen + , configParser = remoteConfigParser , setup = externalSetup , exportSupported = checkExportSupported , importSupported = importUnsupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +externaltypeField :: RemoteConfigField +externaltypeField = Accepted "externaltype" + +readonlyField :: RemoteConfigField +readonlyField = Accepted "readonly" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs -- readonly mode only downloads urls; does not use external program | remoteAnnexReadOnly gc = do @@ -69,7 +79,7 @@ gen r u c gc rs exportUnsupported exportUnsupported | otherwise = do - external <- newExternal externaltype u c gc (Just rs) + external <- newExternal externaltype (Just u) c (Just gc) (Just rs) Annex.addCleanup (RemoteCleanup u) $ stopExternal external cst <- getCost external r gc avail <- getAvailability external r gc @@ -152,32 +162,43 @@ gen r u c gc rs externalSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) externalSetup _ mu _ c gc = do u <- maybe (liftIO genUUID) return mu + pc <- either giveup return $ parseRemoteConfig c lenientRemoteConfigParser let externaltype = fromMaybe (giveup "Specify externaltype=") $ - M.lookup "externaltype" c + getRemoteConfigValue externaltypeField pc (c', _encsetup) <- encryptionSetup c gc - c'' <- case M.lookup "readonly" c of - Just v | isTrueFalse v == Just True -> do - setConfig (remoteConfig (fromJust (lookupName c)) "readonly") (boolConfig True) + c'' <- case getRemoteConfigValue readonlyField pc of + Just True -> do + setConfig (remoteAnnexConfig (fromJust (lookupName c)) "readonly") (boolConfig True) return c' _ -> do - external <- newExternal externaltype u c' gc Nothing + pc' <- either giveup return $ parseRemoteConfig c' lenientRemoteConfigParser + external <- newExternal externaltype (Just u) pc' (Just gc) Nothing + -- Now that we have an external, ask it to LISTCONFIGS, + -- and re-parse the RemoteConfig strictly, so we can + -- error out if the user provided an unexpected config. + _ <- either giveup return . parseRemoteConfig c' + =<< strictRemoteConfigParser external handleRequest external INITREMOTE Nothing $ \resp -> case resp of INITREMOTE_SUCCESS -> result () INITREMOTE_FAILURE errmsg -> Just $ giveup errmsg _ -> Nothing - withExternalState external $ - liftIO . atomically . readTVar . externalConfig + -- Any config changes the external made before + -- responding to INITREMOTE need to be applied to + -- the RemoteConfig. + changes <- withExternalState external $ + liftIO . atomically . readTVar . externalConfigChanges + return (changes c') gitConfigSpecialRemote u c'' [("externaltype", externaltype)] return (c'', u) -checkExportSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool +checkExportSupported :: ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool checkExportSupported c gc = do let externaltype = fromMaybe (giveup "Specify externaltype=") $ - remoteAnnexExternalType gc <|> M.lookup "externaltype" c + remoteAnnexExternalType gc <|> getRemoteConfigValue externaltypeField c checkExportSupported' - =<< newExternal externaltype NoUUID c gc Nothing + =<< newExternal externaltype Nothing c (Just gc) Nothing checkExportSupported' :: External -> Annex Bool checkExportSupported' external = go `catchNonAsync` (const (return False)) @@ -387,36 +408,48 @@ handleRequest' st external req mp responsehandler handleRemoteRequest (DIRHASH_LOWER k) = send $ VALUE $ fromRawFilePath $ hashDirLower def k handleRemoteRequest (SETCONFIG setting value) = - liftIO $ atomically $ modifyTVar' (externalConfig st) $ - M.insert setting value + liftIO $ atomically $ do + modifyTVar' (externalConfig st) $ + M.insert (Accepted setting) $ + RemoteConfigValue (PassedThrough value) + modifyTVar' (externalConfigChanges st) $ \f -> + f . M.insert (Accepted setting) (Accepted value) handleRemoteRequest (GETCONFIG setting) = do - value <- fromMaybe "" . M.lookup setting + value <- fromMaybe "" + . M.lookup (Accepted setting) + . getRemoteConfigPassedThrough <$> liftIO (atomically $ readTVar $ externalConfig st) send $ VALUE value - handleRemoteRequest (SETCREDS setting login password) = do - let v = externalConfig st - c <- liftIO $ atomically $ readTVar v - let gc = externalGitConfig external - c' <- setRemoteCredPair encryptionAlreadySetup c gc - (credstorage setting) - (Just (login, password)) - void $ liftIO $ atomically $ swapTVar v c' - handleRemoteRequest (GETCREDS setting) = do - c <- liftIO $ atomically $ readTVar $ externalConfig st - let gc = externalGitConfig external - creds <- fromMaybe ("", "") <$> - getRemoteCredPair c gc (credstorage setting) - send $ CREDS (fst creds) (snd creds) - handleRemoteRequest GETUUID = send $ - VALUE $ fromUUID $ externalUUID external + handleRemoteRequest (SETCREDS setting login password) = case (externalUUID external, externalGitConfig external) of + (Just u, Just gc) -> do + let v = externalConfig st + c <- liftIO $ atomically $ readTVar v + c' <- setRemoteCredPair' RemoteConfigValue id encryptionAlreadySetup c gc + (credstorage setting u) + (Just (login, password)) + void $ liftIO $ atomically $ swapTVar v c' + _ -> senderror "cannot send SETCREDS here" + handleRemoteRequest (GETCREDS setting) = case (externalUUID external, externalGitConfig external) of + (Just u, Just gc) -> do + c <- liftIO $ atomically $ readTVar $ externalConfig st + creds <- fromMaybe ("", "") <$> + getRemoteCredPair c gc (credstorage setting u) + send $ CREDS (fst creds) (snd creds) + _ -> senderror "cannot send GETCREDS here" + handleRemoteRequest GETUUID = case externalUUID external of + Just u -> send $ VALUE $ fromUUID u + Nothing -> senderror "cannot send GETUUID here" handleRemoteRequest GETGITDIR = send . VALUE . fromRawFilePath =<< fromRepo Git.localGitDir - handleRemoteRequest (SETWANTED expr) = - preferredContentSet (externalUUID external) expr - handleRemoteRequest GETWANTED = do - expr <- fromMaybe "" . M.lookup (externalUUID external) - <$> preferredContentMapRaw - send $ VALUE expr + handleRemoteRequest (SETWANTED expr) = case externalUUID external of + Just u -> preferredContentSet u expr + Nothing -> senderror "cannot send SETWANTED here" + handleRemoteRequest GETWANTED = case externalUUID external of + Just u -> do + expr <- fromMaybe "" . M.lookup u + <$> preferredContentMapRaw + send $ VALUE expr + Nothing -> senderror "cannot send GETWANTED here" handleRemoteRequest (SETSTATE key state) = case externalRemoteStateHandle external of Just h -> setRemoteState h key state @@ -448,13 +481,13 @@ handleRequest' st external req mp responsehandler send = sendMessage st external senderror = sendMessage st external . ERROR - credstorage setting = CredPairStorage + credstorage setting u = CredPairStorage { credPairFile = base , credPairEnvironment = (base ++ "login", base ++ "password") - , credPairRemoteField = setting + , credPairRemoteField = Accepted setting } where - base = replace "/" "_" $ fromUUID (externalUUID external) ++ "-" ++ setting + base = replace "/" "_" $ fromUUID u ++ "-" ++ setting withurl mk uri = handleRemoteRequest $ mk $ setDownloader (show uri) OtherDownloader @@ -579,6 +612,7 @@ startExternal external = do createProcess p `catchIO` runerr cmdpath stderrelay <- async $ errrelayer herr cv <- newTVarIO $ externalDefaultConfig external + ccv <- newTVarIO id pv <- newTVarIO Unprepared pid <- atomically $ do n <- succ <$> readTVar (externalLastPid external) @@ -593,6 +627,7 @@ startExternal external = do void $ waitForProcess ph , externalPrepared = pv , externalConfig = cv + , externalConfigChanges = ccv } basecmd = externalRemoteProgram $ externalType external @@ -712,7 +747,7 @@ checkUrlM external url = retrieveUrl :: Retriever retrieveUrl = fileRetriever $ \f k p -> do us <- getWebUrls k - unlessM (downloadUrl k p us f) $ + unlessM (withUrlOptions $ downloadUrl k p us f) $ giveup "failed to download content" checkKeyUrl :: Git.Repo -> CheckPresent @@ -745,3 +780,63 @@ getInfoM external = (++) INFOVALUE v -> Just $ return $ GetNextMessage $ collect ((f, v) : l) _ -> Nothing + +{- All unknown configs are passed through in case the external program + - uses them. -} +lenientRemoteConfigParser :: RemoteConfigParser +lenientRemoteConfigParser = + addRemoteConfigParser specialRemoteConfigParsers baseRemoteConfigParser + +baseRemoteConfigParser :: RemoteConfigParser +baseRemoteConfigParser = RemoteConfigParser + { remoteConfigFieldParsers = + [ optionalStringParser externaltypeField + (FieldDesc "type of external special remote to use") + , trueFalseParser readonlyField False + (FieldDesc "enable readonly mode") + ] + , remoteConfigRestPassthrough = Just + ( const True + , [("*", FieldDesc "all other parameters are passed to external special remote program")] + ) + } + +{- When the remote supports LISTCONFIGS, only accept the ones it listed. + - When it does not, accept all configs. -} +strictRemoteConfigParser :: External -> Annex RemoteConfigParser +strictRemoteConfigParser external = listConfigs external >>= \case + Nothing -> return lenientRemoteConfigParser + Just l -> do + let s = S.fromList (map fst l) + let listed f = S.member (fromProposedAccepted f) s + return $ lenientRemoteConfigParser + { remoteConfigRestPassthrough = Just (listed, l) } + +listConfigs :: External -> Annex (Maybe [(Setting, FieldDesc)]) +listConfigs external = handleRequest external LISTCONFIGS Nothing (collect []) + where + collect l req = case req of + CONFIG s d -> Just $ return $ + GetNextMessage $ collect ((s, FieldDesc d) : l) + CONFIGEND -> result (Just (reverse l)) + UNSUPPORTED_REQUEST -> result Nothing + _ -> Nothing + +remoteConfigParser :: RemoteConfig -> Annex RemoteConfigParser +remoteConfigParser c + -- No need to start the external when there is no config to parse, + -- or when everything in the config was already accepted; in those + -- cases the lenient parser will do the same thing as the strict + -- parser. + | M.null (M.filter isproposed c) = return lenientRemoteConfigParser + | otherwise = case parseRemoteConfig c baseRemoteConfigParser of + Left _ -> return lenientRemoteConfigParser + Right pc -> case (getRemoteConfigValue externaltypeField pc, getRemoteConfigValue readonlyField pc) of + (Nothing, _) -> return lenientRemoteConfigParser + (_, Just True) -> return lenientRemoteConfigParser + (Just externaltype, _) -> do + external <- newExternal externaltype Nothing pc Nothing Nothing + strictRemoteConfigParser external + where + isproposed (Accepted _) = False + isproposed (Proposed _) = True diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs index b9785cb140..461a4b5258 100644 --- a/Remote/External/Types.hs +++ b/Remote/External/Types.hs @@ -1,6 +1,6 @@ {- External special remote data types. - - - Copyright 2013-2018 Joey Hess + - Copyright 2013-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -28,6 +28,7 @@ module Remote.External.Types ( AsyncMessage(..), ErrorMsg, Setting, + Description, ProtocolVersion, supportedProtocolVersions, ) where @@ -37,7 +38,8 @@ import Types.StandardGroups (PreferredContentExpression) import Utility.Metered (BytesProcessed(..)) import Types.Transfer (Direction(..)) import Config.Cost (Cost) -import Types.Remote (RemoteConfig, RemoteStateHandle) +import Types.RemoteState +import Types.RemoteConfig import Types.Export import Types.Availability (Availability(..)) import Types.Key @@ -50,17 +52,17 @@ import Data.Char data External = External { externalType :: ExternalType - , externalUUID :: UUID + , externalUUID :: Maybe UUID , externalState :: TVar [ExternalState] -- ^ Contains states for external special remote processes -- that are not currently in use. , externalLastPid :: TVar PID - , externalDefaultConfig :: RemoteConfig - , externalGitConfig :: RemoteGitConfig + , externalDefaultConfig :: ParsedRemoteConfig + , externalGitConfig :: Maybe RemoteGitConfig , externalRemoteStateHandle :: Maybe RemoteStateHandle } -newExternal :: ExternalType -> UUID -> RemoteConfig -> RemoteGitConfig -> Maybe RemoteStateHandle -> Annex External +newExternal :: ExternalType -> Maybe UUID -> ParsedRemoteConfig -> Maybe RemoteGitConfig -> Maybe RemoteStateHandle -> Annex External newExternal externaltype u c gc rs = liftIO $ External <$> pure externaltype <*> pure u @@ -78,7 +80,8 @@ data ExternalState = ExternalState , externalShutdown :: IO () , externalPid :: PID , externalPrepared :: TVar PrepareStatus - , externalConfig :: TVar RemoteConfig + , externalConfig :: TVar ParsedRemoteConfig + , externalConfigChanges :: TVar (RemoteConfig -> RemoteConfig) } type PID = Int @@ -129,6 +132,7 @@ data Request | CHECKPRESENT SafeKey | REMOVE SafeKey | WHEREIS SafeKey + | LISTCONFIGS | GETINFO | EXPORTSUPPORTED | EXPORT ExportLocation @@ -145,6 +149,7 @@ needsPREPARE PREPARE = False needsPREPARE (EXTENSIONS _) = False needsPREPARE INITREMOTE = False needsPREPARE EXPORTSUPPORTED = False +needsPREPARE LISTCONFIGS = False needsPREPARE _ = True instance Proto.Sendable Request where @@ -165,6 +170,7 @@ instance Proto.Sendable Request where [ "CHECKPRESENT", Proto.serialize key ] formatMessage (REMOVE key) = [ "REMOVE", Proto.serialize key ] formatMessage (WHEREIS key) = [ "WHEREIS", Proto.serialize key ] + formatMessage LISTCONFIGS = [ "LISTCONFIGS" ] formatMessage GETINFO = [ "GETINFO" ] formatMessage EXPORTSUPPORTED = ["EXPORTSUPPORTED"] formatMessage (EXPORT loc) = [ "EXPORT", Proto.serialize loc ] @@ -209,6 +215,8 @@ data Response | CHECKURL_FAILURE ErrorMsg | WHEREIS_SUCCESS String | WHEREIS_FAILURE + | CONFIG Setting Description + | CONFIGEND | INFOFIELD String | INFOVALUE String | INFOEND @@ -243,6 +251,8 @@ instance Proto.Receivable Response where parseCommand "CHECKURL-FAILURE" = Proto.parse1 CHECKURL_FAILURE parseCommand "WHEREIS-SUCCESS" = Just . WHEREIS_SUCCESS parseCommand "WHEREIS-FAILURE" = Proto.parse0 WHEREIS_FAILURE + parseCommand "CONFIG" = Proto.parse2 CONFIG + parseCommand "CONFIGEND" = Proto.parse0 CONFIGEND parseCommand "INFOFIELD" = Proto.parse1 INFOFIELD parseCommand "INFOVALUE" = Proto.parse1 INFOVALUE parseCommand "INFOEND" = Proto.parse0 INFOEND @@ -330,6 +340,7 @@ instance Proto.Receivable AsyncMessage where -- All are serializable. type ErrorMsg = String type Setting = String +type Description = String type ProtocolVersion = Int type Size = Maybe Integer diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 9fa5916978..780499a8f2 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -39,6 +39,7 @@ import qualified Git.Construct import qualified Annex.Branch import Config import Config.Cost +import Annex.SpecialRemote.Config import Remote.Helper.Git import Remote.Helper.Encryptable import Remote.Helper.Special @@ -55,21 +56,31 @@ import Utility.Tmp import Logs.Remote import Utility.Gpg import Utility.SshHost +import Utility.Tuple import Messages.Progress +import Types.ProposedAccepted remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "gcrypt" -- Remote.Git takes care of enumerating gcrypt remotes too, -- and will call our gen on them. , enumerate = const (return []) , generate = gen + , configParser = mkRemoteConfigParser $ + Remote.Rsync.rsyncRemoteConfigs ++ + [ optionalStringParser gitRepoField + (FieldDesc "(required) path or url to gcrypt repository") + ] , setup = gCryptSetup , exportSupported = exportUnsupported , importSupported = importUnsupported } -chainGen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +gitRepoField :: RemoteConfigField +gitRepoField = Accepted "gitrepo" + +chainGen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) chainGen gcryptr u c gc rs = do g <- gitRepo -- get underlying git repo with real path, not gcrypt path @@ -77,7 +88,7 @@ chainGen gcryptr u c gc rs = do let r' = r { Git.remoteName = Git.remoteName gcryptr } gen r' u c gc rs -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen baser u c gc rs = do -- doublecheck that cache matches underlying repo's gcrypt-id -- (which might not be set), only for local repos @@ -98,15 +109,18 @@ gen baser u c gc rs = do v <- M.lookup u' <$> readRemoteLog case (Git.remoteName baser, v) of (Just remotename, Just c') -> do - setGcryptEncryption c' remotename - storeUUIDIn (remoteConfig baser "uuid") u' + pc <- either giveup return + . parseRemoteConfig c' + =<< configParser remote c' + setGcryptEncryption pc remotename + storeUUIDIn (remoteAnnexConfig baser "uuid") u' setConfig (Git.GCrypt.remoteConfigKey "gcrypt-id" remotename) gcryptid - gen' r u' c' gc rs + gen' r u' pc gc rs _ -> do warning $ "not using unknown gcrypt repository pointed to by remote " ++ Git.repoDescribe r return Nothing -gen' :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +gen' :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen' r u c gc rs = do cst <- remoteCost gc $ if repoCheap r then nearlyCheapRemoteCost else expensiveRemoteCost @@ -187,7 +201,7 @@ unsupportedUrl :: a unsupportedUrl = giveup "using non-ssh remote repo url with gcrypt is not supported" gCryptSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) -gCryptSetup _ mu _ c gc = go $ M.lookup "gitrepo" c +gCryptSetup _ mu _ c gc = go $ fromProposedAccepted <$> M.lookup gitRepoField c where remotename = fromJust (lookupName c) go Nothing = giveup "Specify gitrepo=" @@ -206,7 +220,9 @@ gCryptSetup _ mu _ c gc = go $ M.lookup "gitrepo" c | Git.repoLocation r == url -> noop | otherwise -> error "Another remote with the same name already exists." - setGcryptEncryption c' remotename + pc <- either giveup return . parseRemoteConfig c' + =<< configParser remote c' + setGcryptEncryption pc remotename {- Run a git fetch and a push to the git repo in order to get - its gcrypt-id set up, so that later git annex commands @@ -322,7 +338,7 @@ shellOrRsync r ashell arsync - Also, sets gcrypt-publish-participants to avoid unncessary gpg - passphrase prompts. -} -setGcryptEncryption :: RemoteConfig -> String -> Annex () +setGcryptEncryption :: ParsedRemoteConfig -> String -> Annex () setGcryptEncryption c remotename = do let participants = remoteconfig Git.GCrypt.remoteParticipantConfigKey case extractCipher c of @@ -456,7 +472,7 @@ getGCryptId :: Bool -> Git.Repo -> RemoteGitConfig -> Annex (Maybe Git.GCrypt.GC getGCryptId fast r gc | Git.repoIsLocal r || Git.repoIsLocalUnknown r = extract <$> liftIO (catchMaybeIO $ Git.Config.read r) - | not fast = extract . liftM fst <$> getM (eitherToMaybe <$>) + | not fast = extract . liftM fst3 <$> getM (eitherToMaybe <$>) [ Ssh.onRemote NoConsumeStdin r (\f p -> liftIO (Git.Config.fromPipe r f p), return (Left $ error "configlist failed")) "configlist" [] [] , getConfigViaRsync r gc ] @@ -465,7 +481,7 @@ getGCryptId fast r gc extract Nothing = (Nothing, r) extract (Just r') = (fromConfigValue <$> Git.Config.getMaybe coreGCryptId r', r') -getConfigViaRsync :: Git.Repo -> RemoteGitConfig -> Annex (Either SomeException (Git.Repo, S.ByteString)) +getConfigViaRsync :: Git.Repo -> RemoteGitConfig -> Annex (Either SomeException (Git.Repo, S.ByteString, S.ByteString)) getConfigViaRsync r gc = do (rsynctransport, rsyncurl, _) <- rsyncTransport r gc opts <- rsynctransport diff --git a/Remote/Git.hs b/Remote/Git.hs index d145a0e542..aef6490b38 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -37,6 +37,7 @@ import qualified Annex.SpecialRemote.Config as SpecialRemote import Utility.Tmp import Config import Config.Cost +import Annex.SpecialRemote.Config import Config.DynamicConfig import Annex.Init import Types.CleanupActions @@ -59,11 +60,12 @@ import P2P.Address import Annex.Path import Creds import Types.NumCopies +import Types.ProposedAccepted import Annex.Action import Messages.Progress -import qualified Utility.RawFilePath as R #ifndef mingw32_HOST_OS +import qualified Utility.RawFilePath as R import Utility.FileMode #endif @@ -78,21 +80,28 @@ remote = RemoteType { typename = "git" , enumerate = list , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser locationField + (FieldDesc "url of git remote to remember with special remote") + ] , setup = gitSetup , exportSupported = exportUnsupported , importSupported = importUnsupported } +locationField :: RemoteConfigField +locationField = Accepted "location" + list :: Bool -> Annex [Git.Repo] list autoinit = do c <- fromRepo Git.config rs <- mapM (tweakurl c) =<< Annex.getGitRemotes mapM (configRead autoinit) rs where - annexurl n = Git.ConfigKey ("remote." <> encodeBS' n <> ".annexurl") + annexurl r = remoteConfig r "annexurl" tweakurl c r = do let n = fromJust $ Git.remoteName r - case M.lookup (annexurl n) c of + case M.lookup (annexurl r) c of Nothing -> return r Just url -> inRepo $ \g -> Git.Construct.remoteNamed n $ @@ -111,7 +120,8 @@ list autoinit = do gitSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) gitSetup Init mu _ c _ = do let location = fromMaybe (giveup "Specify location=url") $ - Url.parseURIRelaxed =<< M.lookup "location" c + Url.parseURIRelaxed . fromProposedAccepted + =<< M.lookup locationField c rs <- Annex.getGitRemotes u <- case filter (\r -> Git.location r == Git.Url location) rs of [r] -> getRepoUUID r @@ -125,7 +135,7 @@ gitSetup (Enable _) (Just u) _ c _ = do [ Param "remote" , Param "add" , Param $ fromMaybe (giveup "no name") (SpecialRemote.lookupName c) - , Param $ fromMaybe (giveup "no location") (M.lookup "location" c) + , Param $ maybe (giveup "no location") fromProposedAccepted (M.lookup locationField c) ] return (c, u) gitSetup (Enable _) Nothing _ _ _ = error "unable to enable git remote with no specified uuid" @@ -151,7 +161,7 @@ configRead autoinit r = do Just r' -> return r' _ -> return r -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs -- Remote.GitLFS may be used with a repo that is also encrypted -- with gcrypt so is checked first. @@ -202,7 +212,7 @@ gen r u c gc rs , remoteStateHandle = rs } -unavailable :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +unavailable :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) unavailable r = gen r' where r' = case Git.location r of @@ -238,7 +248,7 @@ tryGitConfigRead autoinit r | haveconfig r = return r -- already read | Git.repoIsSsh r = storeUpdatedRemote $ do v <- Ssh.onRemote NoConsumeStdin r - (pipedconfig, return (Left $ giveup "configlist failed")) + (pipedconfig autoinit (Git.repoDescribe r), return (Left $ giveup "configlist failed")) "configlist" [] configlistfields case v of Right r' @@ -246,30 +256,32 @@ tryGitConfigRead autoinit r | otherwise -> configlist_failed Left _ -> configlist_failed | Git.repoIsHttp r = storeUpdatedRemote geturlconfig - | Git.GCrypt.isEncrypted r = handlegcrypt =<< getConfigMaybe (remoteConfig r "uuid") + | Git.GCrypt.isEncrypted r = handlegcrypt =<< getConfigMaybe (remoteAnnexConfig r "uuid") | Git.repoIsUrl r = return r | otherwise = storeUpdatedRemote $ liftIO $ readlocalannexconfig `catchNonAsync` (const $ return r) where haveconfig = not . M.null . Git.config - pipedconfig cmd params = do + pipedconfig mustincludeuuuid configloc cmd params = do v <- liftIO $ Git.Config.fromPipe r cmd params case v of - Right (r', val) -> do - unless (isUUIDConfigured r' || S.null val) $ do + Right (r', val, _err) -> do + unless (isUUIDConfigured r' || S.null val || not mustincludeuuuid) $ do warning $ "Failed to get annex.uuid configuration of repository " ++ Git.repoDescribe r warning $ "Instead, got: " ++ show val warning $ "This is unexpected; please check the network transport!" return $ Right r' - Left l -> return $ Left l + Left l -> do + warning $ "Unable to parse git config from " ++ configloc + return $ Left l - geturlconfig = Url.withUrlOptions $ \uo -> do + geturlconfig = Url.withUrlOptionsPromptingCreds $ \uo -> do v <- withTmpFile "git-annex.tmp" $ \tmpfile h -> do liftIO $ hClose h let url = Git.repoLocation r ++ "/config" ifM (liftIO $ Url.downloadQuiet nullMeterUpdate url tmpfile uo) - ( Just <$> pipedconfig "git" [Param "config", Param "--null", Param "--list", Param "--file", File tmpfile] + ( Just <$> pipedconfig False url "git" [Param "config", Param "--null", Param "--list", Param "--file", File tmpfile] , return Nothing ) case v of @@ -370,7 +382,7 @@ inAnnex' repo rmt (State connpool duc _ _) key checkhttp = do showChecking repo gc <- Annex.getGitConfig - ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (fromKey keySize key) uo) (keyUrls gc repo rmt key)) + ifM (Url.withUrlOptionsPromptingCreds $ \uo -> anyM (\u -> Url.checkBoth u (fromKey keySize key) uo) (keyUrls gc repo rmt key)) ( return True , giveup "not found" ) @@ -420,7 +432,9 @@ dropKey' repo r (State connpool duc _ _) key return True , return False ) - | Git.repoIsHttp repo = giveup "dropping from http remote not supported" + | Git.repoIsHttp repo = do + warning "dropping from http remote not supported" + return False | otherwise = commitOnCleanup repo r $ do let fallback = Ssh.dropKey repo key P2PHelper.remove (Ssh.runProto r connpool (return False) fallback) key @@ -502,7 +516,8 @@ copyFromRemote'' :: Git.Repo -> Bool -> Remote -> State -> Key -> AssociatedFile copyFromRemote'' repo forcersync r st@(State connpool _ _ _) key file dest meterupdate | Git.repoIsHttp repo = unVerified $ do gc <- Annex.getGitConfig - Annex.Content.downloadUrl key meterupdate (keyUrls gc repo r key) dest + Url.withUrlOptionsPromptingCreds $ + Annex.Content.downloadUrl key meterupdate (keyUrls gc repo r key) dest | not $ Git.repoIsUrl repo = guardUsable repo (unVerified (return False)) $ do params <- Ssh.rsyncParams r Download u <- getUUID @@ -523,7 +538,9 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _) key file dest meter else P2PHelper.retrieve (\p -> Ssh.runProto r connpool (return (False, UnVerified)) (fallback p)) key file dest meterupdate - | otherwise = giveup "copying from non-ssh, non-http remote not supported" + | otherwise = do + warning "copying from non-ssh, non-http remote not supported" + unVerified (return False) where fallback p = unVerified $ feedprogressback $ \p' -> do oh <- mkOutputHandlerQuiet @@ -636,7 +653,9 @@ copyToRemote' repo r st@(State connpool duc _ _) key file meterupdate (\p -> Ssh.runProto r connpool (return False) (copyremotefallback p)) key file meterupdate - | otherwise = giveup "copying to non-ssh repo not supported" + | otherwise = do + warning "copying to non-ssh repo not supported" + return False where copylocal Nothing = return False copylocal (Just (object, checksuccess)) = do diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index fb4f2fce8c..2005502578 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -1,6 +1,6 @@ {- Using git-lfs as a remote. - - - Copyright 2019 Joey Hess + - Copyright 2019-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -14,6 +14,7 @@ import Types.Remote import Annex.Url import Types.Key import Types.Creds +import Types.ProposedAccepted import qualified Annex import qualified Annex.SpecialRemote.Config import qualified Git @@ -24,6 +25,7 @@ import qualified Git.GCrypt import qualified Git.Credential as Git import Config import Config.Cost +import Annex.SpecialRemote.Config import Remote.Helper.Special import Remote.Helper.ExportImport import Remote.Helper.Git @@ -35,6 +37,7 @@ import Crypto import Backend.Hash import Utility.Hash import Utility.SshHost +import Utility.Url import Logs.Remote import Logs.RemoteState import qualified Utility.GitLFS as LFS @@ -52,18 +55,25 @@ import qualified Data.Text.Encoding as E import qualified Control.Concurrent.MSemN as MSemN remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "git-lfs" -- Remote.Git takes care of enumerating git-lfs remotes too, -- and will call our gen on them. , enumerate = const (return []) , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser urlField + (FieldDesc "url of git-lfs repository") + ] , setup = mySetup , exportSupported = exportUnsupported , importSupported = importUnsupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +urlField :: RemoteConfigField +urlField = Accepted "url" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = do -- If the repo uses gcrypt, get the underlaying repo without the -- gcrypt url, to do LFS endpoint discovery on. @@ -127,9 +137,10 @@ mySetup _ mu _ c gc = do u <- maybe (liftIO genUUID) return mu (c', _encsetup) <- encryptionSetup c gc - case (isEncrypted c', Git.GCrypt.urlPrefix `isPrefixOf` url) of + pc <- either giveup return . parseRemoteConfig c' =<< configParser remote c' + case (isEncrypted pc, Git.GCrypt.urlPrefix `isPrefixOf` url) of (False, False) -> noop - (True, True) -> Remote.GCrypt.setGcryptEncryption c' remotename + (True, True) -> Remote.GCrypt.setGcryptEncryption pc remotename (True, False) -> unlessM (Annex.getState Annex.force) $ giveup $ unwords $ [ "Encryption is enabled for this remote," @@ -155,10 +166,11 @@ mySetup _ mu _ c gc = do -- (so it's also usable by git as a non-special remote), -- and set remote.name.annex-git-lfs = true gitConfigSpecialRemote u c' [("git-lfs", "true")] - setConfig (Git.ConfigKey ("remote." <> encodeBS' (getRemoteName c) <> ".url")) url + setConfig (remoteConfig (getRemoteName c) "url") url return (c', u) where - url = fromMaybe (giveup "Specify url=") (M.lookup "url" c) + url = maybe (giveup "Specify url=") fromProposedAccepted + (M.lookup urlField c) remotename = fromJust (lookupName c) {- Check if a remote's url is one known to belong to a git-lfs repository. @@ -175,8 +187,10 @@ configKnownUrl r | otherwise = return Nothing where match g c = fromMaybe False $ do - t <- M.lookup Annex.SpecialRemote.Config.typeField c - u <- M.lookup "url" c + t <- fromProposedAccepted + <$> M.lookup Annex.SpecialRemote.Config.typeField c + u <- fromProposedAccepted + <$> M.lookup urlField c let u' = Git.Remote.parseRemoteLocation u g return $ Git.Remote.RemoteUrl (Git.repoLocation r) == u' && t == typename remote @@ -187,7 +201,7 @@ configKnownUrl r set "config-uuid" (fromUUID cu) r' Nothing -> return r' set k v r' = do - let k' = remoteConfig r' k + let k' = remoteAnnexConfig r' k setConfig k' v return $ Git.Config.store' k' (Git.ConfigValue (encodeBS' v)) r' @@ -270,7 +284,7 @@ discoverLFSEndpoint tro h if needauth (responseStatus resp) then do cred <- prompt $ inRepo $ Git.getUrlCredential (show lfsrepouri) - let endpoint' = addbasicauth cred endpoint + let endpoint' = addbasicauth (Git.credentialBasicAuth cred) endpoint let testreq' = LFS.startTransferRequest endpoint' transfernothing flip catchNonAsync (const (returnendpoint endpoint')) $ do resp' <- makeSmallAPIRequest testreq' @@ -290,12 +304,10 @@ discoverLFSEndpoint tro h needauth status = status == unauthorized401 - addbasicauth cred endpoint = - case (Git.credentialUsername cred, Git.credentialPassword cred) of - (Just u, Just p) -> - LFS.modifyEndpointRequest endpoint $ - applyBasicAuth (encodeBS u) (encodeBS p) - _ -> endpoint + addbasicauth (Just ba) endpoint = + LFS.modifyEndpointRequest endpoint $ + applyBasicAuth' ba + addbasicauth Nothing endpoint = endpoint -- The endpoint is cached for later use. getLFSEndpoint :: LFS.TransferRequestOperation -> TVar LFSHandle -> Annex (Maybe LFS.Endpoint) diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 00d623f50f..b422e4f35d 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -1,6 +1,6 @@ {- Amazon Glacier remotes. - - - Copyright 2012 Joey Hess + - Copyright 2012-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -16,6 +16,7 @@ import Types.Remote import qualified Git import Config import Config.Cost +import Annex.SpecialRemote.Config import Remote.Helper.Special import Remote.Helper.Messages import Remote.Helper.ExportImport @@ -25,21 +26,40 @@ import Utility.Metered import qualified Annex import Annex.UUID import Utility.Env +import Types.ProposedAccepted type Vault = String type Archive = FilePath remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "glacier" , enumerate = const (findSpecialRemotes "glacier") , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser datacenterField + (FieldDesc "S3 datacenter to use") + , optionalStringParser vaultField + (FieldDesc "name to use for vault") + , optionalStringParser fileprefixField + (FieldDesc "prefix to add to filenames in the vault") + , optionalStringParser AWS.s3credsField HiddenField + ] , setup = glacierSetup , exportSupported = exportUnsupported , importSupported = importUnsupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +datacenterField :: RemoteConfigField +datacenterField = Accepted "datacenter" + +vaultField :: RemoteConfigField +vaultField = Accepted "vault" + +fileprefixField :: RemoteConfigField +fileprefixField = Accepted "fileprefix" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = new <$> remoteCost gc veryExpensiveRemoteCost where new cst = Just $ specialRemote' specialcfg c @@ -99,8 +119,10 @@ glacierSetup' ss u mcreds c gc = do (c', encsetup) <- encryptionSetup c gc c'' <- setRemoteCredPair encsetup c' gc (AWS.creds u) mcreds let fullconfig = c'' `M.union` defaults + pc <- either giveup return . parseRemoteConfig fullconfig + =<< configParser remote fullconfig case ss of - Init -> genVault fullconfig gc u + Init -> genVault pc gc u _ -> return () gitConfigSpecialRemote u fullconfig [("glacier", "true")] return (fullconfig, u) @@ -108,8 +130,8 @@ glacierSetup' ss u mcreds c gc = do remotename = fromJust (lookupName c) defvault = remotename ++ "-" ++ fromUUID u defaults = M.fromList - [ ("datacenter", T.unpack $ AWS.defaultRegion AWS.Glacier) - , ("vault", defvault) + [ (datacenterField, Proposed $ T.unpack $ AWS.defaultRegion AWS.Glacier) + , (vaultField, Proposed defvault) ] prepareStore :: Remote -> Preparer Storer @@ -224,21 +246,21 @@ checkKey r k = do glacierAction :: Remote -> [CommandParam] -> Annex Bool glacierAction r = runGlacier (config r) (gitconfig r) (uuid r) -runGlacier :: RemoteConfig -> RemoteGitConfig -> UUID -> [CommandParam] -> Annex Bool +runGlacier :: ParsedRemoteConfig -> RemoteGitConfig -> UUID -> [CommandParam] -> Annex Bool runGlacier c gc u params = go =<< glacierEnv c gc u where go Nothing = return False go (Just e) = liftIO $ boolSystemEnv "glacier" (glacierParams c params) (Just e) -glacierParams :: RemoteConfig -> [CommandParam] -> [CommandParam] +glacierParams :: ParsedRemoteConfig -> [CommandParam] -> [CommandParam] glacierParams c params = datacenter:params where datacenter = Param $ "--region=" ++ fromMaybe (giveup "Missing datacenter configuration") - (M.lookup "datacenter" c) + (getRemoteConfigValue datacenterField c) -glacierEnv :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex (Maybe [(String, String)]) +glacierEnv :: ParsedRemoteConfig -> RemoteGitConfig -> UUID -> Annex (Maybe [(String, String)]) glacierEnv c gc u = do liftIO checkSaneGlacierCommand go =<< getRemoteCredPairFor "glacier" c gc creds @@ -251,16 +273,17 @@ glacierEnv c gc u = do creds = AWS.creds u (uk, pk) = credPairEnvironment creds -getVault :: RemoteConfig -> Vault +getVault :: ParsedRemoteConfig -> Vault getVault = fromMaybe (giveup "Missing vault configuration") - . M.lookup "vault" + . getRemoteConfigValue vaultField archive :: Remote -> Key -> Archive archive r k = fileprefix ++ serializeKey k where - fileprefix = M.findWithDefault "" "fileprefix" $ config r + fileprefix = fromMaybe "" $ + getRemoteConfigValue fileprefixField $ config r -genVault :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex () +genVault :: ParsedRemoteConfig -> RemoteGitConfig -> UUID -> Annex () genVault c gc u = unlessM (runGlacier c gc u params) $ giveup "Failed creating glacier vault." where diff --git a/Remote/Helper/AWS.hs b/Remote/Helper/AWS.hs index 3ab2063e4b..21ef69c2b4 100644 --- a/Remote/Helper/AWS.hs +++ b/Remote/Helper/AWS.hs @@ -12,6 +12,8 @@ module Remote.Helper.AWS where import Annex.Common import Creds +import Types.ProposedAccepted +import Types.RemoteConfig import qualified Data.Map as M import qualified Data.ByteString as B @@ -23,9 +25,12 @@ creds :: UUID -> CredPairStorage creds u = CredPairStorage { credPairFile = fromUUID u , credPairEnvironment = ("AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY") - , credPairRemoteField = "s3creds" + , credPairRemoteField = s3credsField } +s3credsField :: RemoteConfigField +s3credsField = Accepted "s3creds" + data Service = S3 | Glacier deriving (Eq) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 473760edb3..5dfd2999b3 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -1,6 +1,6 @@ {- git-annex chunked remotes - - - Copyright 2014 Joey Hess + - Copyright 2014-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -10,17 +10,20 @@ module Remote.Helper.Chunked ( ChunkConfig(..), noChunks, describeChunkConfig, + chunkConfigParsers, getChunkConfig, storeChunks, removeChunks, retrieveChunks, checkPresentChunks, + chunkField, ) where import Annex.Common import Utility.DataUnits import Types.StoreRetrieve import Types.Remote +import Types.ProposedAccepted import Logs.Chunk import Utility.Metered import Crypto (EncKey) @@ -28,7 +31,6 @@ import Backend (isStableKey) import Annex.SpecialRemote.Config import qualified Data.ByteString.Lazy as L -import qualified Data.Map as M data ChunkConfig = NoChunks @@ -48,19 +50,26 @@ noChunks :: ChunkConfig -> Bool noChunks NoChunks = True noChunks _ = False -getChunkConfig :: RemoteConfig -> ChunkConfig -getChunkConfig m = - case M.lookup chunksizeField m of - Nothing -> case M.lookup "chunk" m of +chunkConfigParsers :: [RemoteConfigFieldParser] +chunkConfigParsers = + [ optionalStringParser chunksizeField HiddenField -- deprecated + , optionalStringParser chunkField + (FieldDesc "size of chunks (eg, 1MiB)") + ] + +getChunkConfig :: ParsedRemoteConfig -> ChunkConfig +getChunkConfig c = + case getRemoteConfigValue chunksizeField c of + Nothing -> case getRemoteConfigValue chunkField c of Nothing -> NoChunks - Just v -> readsz UnpaddedChunks v "chunk" + Just v -> readsz UnpaddedChunks v chunkField Just v -> readsz LegacyChunks v chunksizeField where - readsz c v f = case readSize dataUnits v of + readsz mk v f = case readSize dataUnits v of Just size | size == 0 -> NoChunks - | size > 0 -> c (fromInteger size) - _ -> giveup $ "bad configuration " ++ f ++ "=" ++ v + | size > 0 -> mk (fromInteger size) + _ -> giveup $ "bad configuration " ++ fromProposedAccepted f ++ "=" ++ v -- An infinite stream of chunk keys, starting from chunk 1. newtype ChunkKeyStream = ChunkKeyStream [Key] diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 42df0e41bc..33ef848bb7 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -1,15 +1,19 @@ {- common functions for encryptable remotes - - - Copyright 2011 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE FlexibleContexts, ScopedTypeVariables #-} + module Remote.Helper.Encryptable ( EncryptionIsSetup, encryptionSetup, noEncryptionUsed, encryptionAlreadySetup, + encryptionConfigParsers, + parseEncryptionConfig, remoteCipher, remoteCipher', embedCreds, @@ -17,17 +21,20 @@ module Remote.Helper.Encryptable ( extractCipher, isEncrypted, describeEncryption, + encryptionField, + highRandomQualityField ) where import qualified Data.Map as M +import qualified Data.Set as S import qualified "sandi" Codec.Binary.Base64 as B64 import qualified Data.ByteString as B import Annex.Common import Types.Remote -import Config import Crypto import Types.Crypto +import Types.ProposedAccepted import qualified Annex import Annex.SpecialRemote.Config @@ -46,67 +53,170 @@ noEncryptionUsed = NoEncryption encryptionAlreadySetup :: EncryptionIsSetup encryptionAlreadySetup = EncryptionIsSetup +encryptionConfigParsers :: [RemoteConfigFieldParser] +encryptionConfigParsers = + [ encryptionFieldParser + , optionalStringParser cipherField HiddenField + , optionalStringParser cipherkeysField HiddenField + , optionalStringParser pubkeysField HiddenField + , yesNoParser embedCredsField False + (FieldDesc "embed credentials into git repository") + , macFieldParser + , optionalStringParser (Accepted "keyid") + (FieldDesc "gpg key id") + , optionalStringParser (Accepted "keyid+") + (FieldDesc "add additional gpg key") + , optionalStringParser (Accepted "keyid-") + (FieldDesc "remove gpg key") + , highRandomQualityFieldParser + ] + +encryptionConfigs :: S.Set RemoteConfigField +encryptionConfigs = S.fromList (map parserForField encryptionConfigParsers) + +-- Parse only encryption fields, ignoring all others. +parseEncryptionConfig :: RemoteConfig -> Either String ParsedRemoteConfig +parseEncryptionConfig c = parseRemoteConfig + (M.restrictKeys c encryptionConfigs) + (RemoteConfigParser encryptionConfigParsers Nothing) + +encryptionFieldParser :: RemoteConfigFieldParser +encryptionFieldParser = RemoteConfigFieldParser + { parserForField = encryptionField + , valueParser = \v c -> Just . RemoteConfigValue + <$> parseEncryptionMethod (fmap fromProposedAccepted v) c + , fieldDesc = FieldDesc "how to encrypt data stored in the special remote" + , valueDesc = Just $ ValueDesc $ + intercalate " or " (M.keys encryptionMethods) + } + +encryptionMethods :: M.Map String EncryptionMethod +encryptionMethods = M.fromList + [ ("none", NoneEncryption) + , ("shared", SharedEncryption) + , ("hybrid", HybridEncryption) + , ("pubkey", PubKeyEncryption) + , ("sharedpubkey", SharedPubKeyEncryption) + ] + +parseEncryptionMethod :: Maybe String -> RemoteConfig -> Either String EncryptionMethod +parseEncryptionMethod (Just s) _ = case M.lookup s encryptionMethods of + Just em -> Right em + Nothing -> Left badEncryptionMethod +-- Hybrid encryption is the default when a keyid is specified without +-- an encryption field, or when there's a cipher already but no encryption +-- field. +parseEncryptionMethod Nothing c + | M.member (Accepted "keyid") c || M.member cipherField c = Right HybridEncryption + | otherwise = Left badEncryptionMethod + +badEncryptionMethod :: String +badEncryptionMethod = "Specify " ++ intercalate " or " + (map ((fromProposedAccepted encryptionField ++ "=") ++) + (M.keys encryptionMethods)) + ++ "." + +highRandomQualityField :: RemoteConfigField +highRandomQualityField = Accepted "highRandomQuality" + +highRandomQualityFieldParser :: RemoteConfigFieldParser +highRandomQualityFieldParser = RemoteConfigFieldParser + { parserForField = highRandomQualityField + , valueParser = \v _c -> Just . RemoteConfigValue + <$> parseHighRandomQuality (fmap fromProposedAccepted v) + , fieldDesc = HiddenField + , valueDesc = Nothing + } + +parseHighRandomQuality :: Maybe String -> Either String Bool +parseHighRandomQuality Nothing = Right True +parseHighRandomQuality (Just "false") = Right False +parseHighRandomQuality (Just "true") = Right True +parseHighRandomQuality _ = Left "expected highRandomQuality=true/false" + +macFieldParser :: RemoteConfigFieldParser +macFieldParser = RemoteConfigFieldParser + { parserForField = macField + , valueParser = \v _c -> Just . RemoteConfigValue <$> parseMac v + , fieldDesc = FieldDesc "how to encrypt filenames used on the remote" + , valueDesc = Just $ ValueDesc $ + intercalate " or " (M.keys macMap) + } + +parseMac :: Maybe (ProposedAccepted String) -> Either String Mac +parseMac Nothing = Right defaultMac +parseMac (Just (Accepted s)) = Right $ fromMaybe defaultMac (readMac s) +parseMac (Just (Proposed s)) = case readMac s of + Just mac -> Right mac + Nothing -> Left "bad mac value" + {- Encryption setup for a remote. The user must specify whether to use - an encryption key, or not encrypt. An encrypted cipher is created, or is - updated to be accessible to an additional encryption key. Or the user - could opt to use a shared cipher, which is stored unencrypted. -} encryptionSetup :: RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, EncryptionIsSetup) encryptionSetup c gc = do + pc <- either giveup return $ parseEncryptionConfig c cmd <- gpgCmd <$> Annex.getGitConfig - maybe (genCipher cmd) (updateCipher cmd) (extractCipher c) + maybe (genCipher pc cmd) (updateCipher pc cmd) (extractCipher pc) where -- The type of encryption - encryption = M.lookup encryptionField c + encryption = parseEncryptionMethod (fromProposedAccepted <$> M.lookup encryptionField c) c -- Generate a new cipher, depending on the chosen encryption scheme - genCipher cmd = case encryption of - _ | hasEncryptionConfig c -> cannotchange - Just "none" -> return (c, NoEncryption) - Just "shared" -> encsetup $ genSharedCipher cmd - -- hybrid encryption is the default when a keyid is - -- specified but no encryption - _ | maybe (M.member "keyid" c) (== "hybrid") encryption -> - encsetup $ genEncryptedCipher cmd (c, gc) key Hybrid - Just "pubkey" -> encsetup $ genEncryptedCipher cmd (c, gc) key PubKey - Just "sharedpubkey" -> encsetup $ genSharedPubKeyCipher cmd key - _ -> giveup $ "Specify " ++ intercalate " or " - (map ((encryptionField ++ "=") ++) - ["none","shared","hybrid","pubkey", "sharedpubkey"]) - ++ "." - key = fromMaybe (giveup "Specify keyid=...") $ M.lookup "keyid" c - newkeys = maybe [] (\k -> [(True,k)]) (M.lookup "keyid+" c) ++ - maybe [] (\k -> [(False,k)]) (M.lookup "keyid-" c) + genCipher pc cmd = case encryption of + Right NoneEncryption -> return (c, NoEncryption) + Right SharedEncryption -> encsetup $ genSharedCipher cmd + Right HybridEncryption -> encsetup $ genEncryptedCipher cmd (pc, gc) key Hybrid + Right PubKeyEncryption -> encsetup $ genEncryptedCipher cmd (pc, gc) key PubKey + Right SharedPubKeyEncryption -> encsetup $ genSharedPubKeyCipher cmd key + Left err -> giveup err + key = maybe (giveup "Specify keyid=...") fromProposedAccepted $ + M.lookup (Accepted "keyid") c + newkeys = maybe [] (\k -> [(True,fromProposedAccepted k)]) (M.lookup (Accepted "keyid+") c) ++ + maybe [] (\k -> [(False,fromProposedAccepted k)]) (M.lookup (Accepted "keyid-") c) cannotchange = giveup "Cannot set encryption type of existing remotes." -- Update an existing cipher if possible. - updateCipher cmd v = case v of - SharedCipher _ | maybe True (== "shared") encryption -> return (c', EncryptionIsSetup) - EncryptedCipher _ variant _ - | maybe True (== if variant == Hybrid then "hybrid" else "pubkey") encryption -> do - use "encryption update" $ updateCipherKeyIds cmd (c, gc) newkeys v + updateCipher pc cmd v = case v of + SharedCipher _ | encryption == Right SharedEncryption -> + return (c', EncryptionIsSetup) + EncryptedCipher _ variant _ | sameasencryption variant -> + use "encryption update" $ updateCipherKeyIds cmd (pc, gc) newkeys v SharedPubKeyCipher _ _ -> - use "encryption update" $ updateCipherKeyIds cmd (c, gc) newkeys v + use "encryption update" $ updateCipherKeyIds cmd (pc, gc) newkeys v _ -> cannotchange + sameasencryption variant = case encryption of + Right HybridEncryption -> variant == Hybrid + Right PubKeyEncryption -> variant == PubKey + Right _ -> False + Left _ -> True encsetup a = use "encryption setup" . a =<< highRandomQuality use m a = do showNote m cipher <- liftIO a showNote (describeCipher cipher) return (storeCipher cipher c', EncryptionIsSetup) - highRandomQuality = - (&&) (maybe True ( /= "false") $ M.lookup "highRandomQuality" c) - <$> fmap not (Annex.getState Annex.fast) + highRandomQuality = ifM (Annex.getState Annex.fast) + ( return False + , case parseHighRandomQuality (fromProposedAccepted <$> M.lookup highRandomQualityField c) of + Left err -> giveup err + Right v -> return v + ) c' = foldr M.delete c - -- git-annex used to remove 'encryption' as well, since + -- Remove configs that are only used in here to generate + -- the encryption keys, and should not be stored in + -- remote.log. + -- Older versions used to remove 'encryption' as well, since -- it was redundant; we now need to keep it for -- public-key encryption, hence we leave it on newer -- remotes (while being backward-compatible). - [ "keyid", "keyid+", "keyid-", "highRandomQuality" ] + (map Accepted ["keyid", "keyid+", "keyid-", "highRandomQuality"]) -remoteCipher :: RemoteConfig -> RemoteGitConfig -> Annex (Maybe Cipher) +remoteCipher :: ParsedRemoteConfig -> RemoteGitConfig -> Annex (Maybe Cipher) remoteCipher c gc = fmap fst <$> remoteCipher' c gc {- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex - state. -} -remoteCipher' :: RemoteConfig -> RemoteGitConfig -> Annex (Maybe (Cipher, StorableCipher)) +remoteCipher' :: ParsedRemoteConfig -> RemoteGitConfig -> Annex (Maybe (Cipher, StorableCipher)) remoteCipher' c gc = go $ extractCipher c where go Nothing = return Nothing @@ -128,17 +238,19 @@ remoteCipher' c gc = go $ extractCipher c - When gpg encryption is used and the creds are encrypted using it. - Not when a shared cipher is used. -} -embedCreds :: RemoteConfig -> Bool -embedCreds c = case yesNo =<< M.lookup "embedcreds" c of +embedCreds :: ParsedRemoteConfig -> Bool +embedCreds c = case getRemoteConfigValue embedCredsField c of Just v -> v - Nothing -> isJust (M.lookup cipherkeysField c) && isJust (M.lookup cipherField c) + Nothing -> case (getRemoteConfigValue cipherkeysField c, getRemoteConfigValue cipherField c) of + (Just (_ :: ProposedAccepted String), Just (_ :: ProposedAccepted String)) -> True + _ -> False {- Gets encryption Cipher, and key encryptor. -} -cipherKey :: RemoteConfig -> RemoteGitConfig -> Annex (Maybe (Cipher, EncKey)) +cipherKey :: ParsedRemoteConfig -> RemoteGitConfig -> Annex (Maybe (Cipher, EncKey)) cipherKey c gc = fmap make <$> remoteCipher c gc where make ciphertext = (ciphertext, encryptKey mac ciphertext) - mac = fromMaybe defaultMac $ M.lookup macField c >>= readMac + mac = fromMaybe defaultMac $ getRemoteConfigValue macField c {- Stores an StorableCipher in a remote's configuration. -} storeCipher :: StorableCipher -> RemoteConfig -> RemoteConfig @@ -147,40 +259,32 @@ storeCipher cip = case cip of (EncryptedCipher t _ ks) -> addcipher t . storekeys ks cipherkeysField (SharedPubKeyCipher t ks) -> addcipher t . storekeys ks pubkeysField where - addcipher t = M.insert cipherField (toB64bs t) - storekeys (KeyIds l) n = M.insert n (intercalate "," l) + addcipher t = M.insert cipherField (Accepted (toB64bs t)) + storekeys (KeyIds l) n = M.insert n (Accepted (intercalate "," l)) {- Extracts an StorableCipher from a remote's configuration. -} -extractCipher :: RemoteConfig -> Maybe StorableCipher -extractCipher c = case (M.lookup cipherField c, - M.lookup cipherkeysField c <|> M.lookup pubkeysField c, - M.lookup encryptionField c) of - (Just t, Just ks, encryption) | maybe True (== "hybrid") encryption -> +extractCipher :: ParsedRemoteConfig -> Maybe StorableCipher +extractCipher c = case (getRemoteConfigValue cipherField c, + (getRemoteConfigValue cipherkeysField c <|> getRemoteConfigValue pubkeysField c), + getRemoteConfigValue encryptionField c) of + (Just t, Just ks, Just HybridEncryption) -> Just $ EncryptedCipher (fromB64bs t) Hybrid (readkeys ks) - (Just t, Just ks, Just "pubkey") -> + (Just t, Just ks, Just PubKeyEncryption) -> Just $ EncryptedCipher (fromB64bs t) PubKey (readkeys ks) - (Just t, Just ks, Just "sharedpubkey") -> + (Just t, Just ks, Just SharedPubKeyEncryption) -> Just $ SharedPubKeyCipher (fromB64bs t) (readkeys ks) - (Just t, Nothing, encryption) | maybe True (== "shared") encryption -> + (Just t, Nothing, Just SharedEncryption) -> Just $ SharedCipher (fromB64bs t) _ -> Nothing where readkeys = KeyIds . splitc ',' -isEncrypted :: RemoteConfig -> Bool -isEncrypted c = case M.lookup encryptionField c of - Just "none" -> False - Just _ -> True - Nothing -> hasEncryptionConfig c +isEncrypted :: ParsedRemoteConfig -> Bool +isEncrypted = isJust . extractCipher -hasEncryptionConfig :: RemoteConfig -> Bool -hasEncryptionConfig c = M.member cipherField c - || M.member cipherkeysField c - || M.member pubkeysField c - -describeEncryption :: RemoteConfig -> String +describeEncryption :: ParsedRemoteConfig -> String describeEncryption c = case extractCipher c of - Nothing -> "none" + Nothing -> "none" ++ show (getRemoteConfigValue cipherField c :: Maybe String) ++ show (M.keys c) Just cip -> nameCipher cip ++ " (" ++ describeCipher cip ++ ")" nameCipher :: StorableCipher -> String diff --git a/Remote/Helper/ExportImport.hs b/Remote/Helper/ExportImport.hs index 21d9814c65..a412f143fe 100644 --- a/Remote/Helper/ExportImport.hs +++ b/Remote/Helper/ExportImport.hs @@ -13,25 +13,25 @@ import Annex.Common import Types.Remote import Types.Backend import Types.Key +import Types.ProposedAccepted import Backend import Remote.Helper.Encryptable (isEncrypted) import qualified Database.Export as Export import qualified Database.ContentIdentifier as ContentIdentifier import Annex.Export import Annex.LockFile -import Config +import Annex.SpecialRemote.Config import Git.Types (fromRef) import Logs.Export import Logs.ContentIdentifier (recordContentIdentifier) -import qualified Data.Map as M import Control.Concurrent.STM -- | Use for remotes that do not support exports. class HasExportUnsupported a where exportUnsupported :: a -instance HasExportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) where +instance HasExportUnsupported (ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool) where exportUnsupported = \_ _ -> return False instance HasExportUnsupported (ExportActions Annex) where @@ -50,7 +50,7 @@ instance HasExportUnsupported (ExportActions Annex) where class HasImportUnsupported a where importUnsupported :: a -instance HasImportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) where +instance HasImportUnsupported (ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool) where importUnsupported = \_ _ -> return False instance HasImportUnsupported (ImportActions Annex) where @@ -63,59 +63,70 @@ instance HasImportUnsupported (ImportActions Annex) where , checkPresentExportWithContentIdentifier = \_ _ _ -> return False } -exportIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool +exportIsSupported :: ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool exportIsSupported = \_ _ -> return True -importIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool +importIsSupported :: ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool importIsSupported = \_ _ -> return True -- | Prevent or allow exporttree=yes and importtree=yes when -- setting up a new remote, depending on exportSupported and importSupported. adjustExportImportRemoteType :: RemoteType -> RemoteType -adjustExportImportRemoteType rt = rt { setup = setup' } +adjustExportImportRemoteType rt = rt + { setup = setup' + , configParser = configparser + } where - setup' st mu cp c gc = - let checkconfig supported configured setting cont = - ifM (supported rt c gc) + configparser c = addRemoteConfigParser exportImportConfigParsers + <$> configParser rt c + setup' st mu cp c gc = do + pc <- either giveup return . parseRemoteConfig c =<< configparser c + let checkconfig supported configured configfield cont = + ifM (supported rt pc gc) ( case st of Init - | configured c && isEncrypted c -> - giveup $ "cannot enable both encryption and " ++ setting + | configured pc && isEncrypted pc -> + giveup $ "cannot enable both encryption and " ++ fromProposedAccepted configfield | otherwise -> cont - Enable oldc - | configured c /= configured oldc -> - giveup $ "cannot change " ++ setting ++ " of existing special remote" - | otherwise -> cont - , if configured c - then giveup $ setting ++ " is not supported by this special remote" + Enable oldc -> do + oldpc <- either mempty id . parseRemoteConfig oldc <$> configparser oldc + if configured pc /= configured oldpc + then giveup $ "cannot change " ++ fromProposedAccepted configfield ++ " of existing special remote" + else cont + , if configured pc + then giveup $ fromProposedAccepted configfield ++ " is not supported by this special remote" else cont ) - in checkconfig exportSupported exportTree "exporttree" $ - checkconfig importSupported importTree "importtree" $ - if importTree c && not (exportTree c) + checkconfig exportSupported exportTree exportTreeField $ + checkconfig importSupported importTree importTreeField $ + if importTree pc && not (exportTree pc) then giveup "cannot enable importtree=yes without also enabling exporttree=yes" else setup rt st mu cp c gc +exportImportConfigParsers :: [RemoteConfigFieldParser] +exportImportConfigParsers = + [ yesNoParser exportTreeField False + (FieldDesc "export trees of files to this remote") + , yesNoParser importTreeField False + (FieldDesc "import trees of files from this remote") + ] + -- | Adjust a remote to support exporttree=yes and importree=yes. -- -- Note that all remotes with importree=yes also have exporttree=yes. adjustExportImport :: Remote -> RemoteStateHandle -> Annex Remote -adjustExportImport r rs = case M.lookup "exporttree" (config r) of +adjustExportImport r rs = case getRemoteConfigValue exportTreeField (config r) of Nothing -> return $ notexport r - Just c -> case yesNo c of - Just True -> ifM (isExportSupported r) - ( do - exportdbv <- prepexportdb - r' <- isexport exportdbv - if importTree (config r) - then isimport r' exportdbv - else return r' - , return $ notexport r - ) - Just False -> return $ notexport r - Nothing -> do - warning $ "bad exporttree value for " ++ name r ++ ", assuming not an export" - return $ notexport r + Just True -> ifM (isExportSupported r) + ( do + exportdbv <- prepexportdb + r' <- isexport exportdbv + if importTree (config r) + then isimport r' exportdbv + else return r' + , return $ notexport r + ) + Just False -> return $ notexport r where notexport r' = notimport r' { exportActions = exportUnsupported diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 40934c6f08..a8b945c723 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -1,6 +1,6 @@ {- helpers for special remotes - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -30,6 +30,8 @@ module Remote.Helper.Special ( checkPresentDummy, SpecialRemoteCfg(..), specialRemoteCfg, + specialRemoteConfigParsers, + specialRemoteType, specialRemote, specialRemote', lookupName, @@ -79,8 +81,8 @@ findSpecialRemotes s = do gitConfigSpecialRemote :: UUID -> RemoteConfig -> [(String, String)] -> Annex () gitConfigSpecialRemote u c cfgs = do forM_ cfgs $ \(k, v) -> - setConfig (remoteConfig c (encodeBS' k)) v - storeUUIDIn (remoteConfig c "uuid") u + setConfig (remoteAnnexConfig c (encodeBS' k)) v + storeUUIDIn (remoteAnnexConfig c "uuid") u -- RetrievalVerifiableKeysSecure unless overridden by git config. -- @@ -149,7 +151,7 @@ checkPresentDummy :: Key -> Annex Bool checkPresentDummy _ = error "missing checkPresent implementation" type RemoteModifier - = RemoteConfig + = ParsedRemoteConfig -> Preparer Storer -> Preparer Retriever -> Preparer Remover @@ -162,9 +164,19 @@ data SpecialRemoteCfg = SpecialRemoteCfg , displayProgress :: Bool } -specialRemoteCfg :: RemoteConfig -> SpecialRemoteCfg +specialRemoteCfg :: ParsedRemoteConfig -> SpecialRemoteCfg specialRemoteCfg c = SpecialRemoteCfg (getChunkConfig c) True +-- Modifies a base RemoteType to support chunking and encryption configs. +specialRemoteType :: RemoteType -> RemoteType +specialRemoteType r = r + { configParser = \c -> addRemoteConfigParser specialRemoteConfigParsers + <$> configParser r c + } + +specialRemoteConfigParsers :: [RemoteConfigFieldParser] +specialRemoteConfigParsers = chunkConfigParsers ++ encryptionConfigParsers + -- Modifies a base Remote to support both chunking and encryption, -- which special remotes typically should support. -- @@ -212,7 +224,7 @@ specialRemote' cfg c preparestorer prepareretriever prepareremover preparecheckp } } cip = cipherKey c (gitconfig baser) - isencrypted = isJust (extractCipher c) + isencrypted = isEncrypted c safely a = catchNonAsync a (\e -> warning (show e) >> return False) diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 897e73cc1f..2fab096c03 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -1,6 +1,6 @@ {- A remote that provides hooks to run shell commands. - - - Copyright 2011 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -15,11 +15,13 @@ import Git.Types (fromConfigKey, fromConfigValue) import Config import Config.Cost import Annex.UUID +import Annex.SpecialRemote.Config import Remote.Helper.Special import Remote.Helper.Messages import Remote.Helper.ExportImport import Utility.Env import Messages.Progress +import Types.ProposedAccepted import qualified Data.Map as M @@ -27,16 +29,23 @@ type Action = String type HookName = String remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "hook" , enumerate = const (findSpecialRemotes "hooktype") , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser hooktypeField + (FieldDesc "(required) specify collection of hooks to use") + ] , setup = hookSetup , exportSupported = exportUnsupported , importSupported = importUnsupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +hooktypeField :: RemoteConfigField +hooktypeField = Accepted "hooktype" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = do cst <- remoteCost gc expensiveRemoteCost return $ Just $ specialRemote c @@ -85,8 +94,8 @@ gen r u c gc rs = do hookSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) hookSetup _ mu _ c gc = do u <- maybe (liftIO genUUID) return mu - let hooktype = fromMaybe (giveup "Specify hooktype=") $ - M.lookup "hooktype" c + let hooktype = maybe (giveup "Specify hooktype=") fromProposedAccepted $ + M.lookup hooktypeField c (c', _encsetup) <- encryptionSetup c gc gitConfigSpecialRemote u c' [("hooktype", hooktype)] return (c', u) diff --git a/Remote/List.hs b/Remote/List.hs index 3e7ca9fa73..82869b9aa8 100644 --- a/Remote/List.hs +++ b/Remote/List.hs @@ -1,6 +1,6 @@ {- git-annex remote list - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -20,6 +20,7 @@ import Annex.UUID import Remote.Helper.Hooks import Remote.Helper.ReadOnly import Remote.Helper.ExportImport +import Annex.SpecialRemote.Config import qualified Git import qualified Git.Config @@ -109,7 +110,8 @@ remoteGen m t g = do let cu = fromMaybe u $ remoteAnnexConfigUUID gc let rs = RemoteStateHandle cu let c = fromMaybe M.empty $ M.lookup cu m - generate t g u c gc rs >>= \case + pc <- either (const mempty) id . parseRemoteConfig c <$> configParser t c + generate t g u pc gc rs >>= \case Nothing -> return Nothing Just r -> Just <$> adjustExportImport (adjustReadOnly (addHooks r)) rs @@ -126,8 +128,8 @@ updateRemote remote = do | otherwise = return r {- Checks if a remote is syncable using git. -} -gitSyncableRemote :: Remote -> Bool -gitSyncableRemote r = remotetype r `elem` +gitSyncableRemoteType :: RemoteType -> Bool +gitSyncableRemoteType t = t `elem` [ Remote.Git.remote , Remote.GCrypt.remote , Remote.P2P.remote diff --git a/Remote/P2P.hs b/Remote/P2P.hs index 755de1631e..be90ce55f6 100644 --- a/Remote/P2P.hs +++ b/Remote/P2P.hs @@ -36,12 +36,13 @@ remote = RemoteType -- and will call chainGen on them. , enumerate = const (return []) , generate = \_ _ _ _ _ -> return Nothing + , configParser = mkRemoteConfigParser [] , setup = error "P2P remotes are set up using git-annex p2p" , exportSupported = exportUnsupported , importSupported = importUnsupported } -chainGen :: P2PAddress -> Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +chainGen :: P2PAddress -> Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) chainGen addr r u c gc rs = do connpool <- mkConnectionPool cst <- remoteCost gc veryExpensiveRemoteCost diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 1847514002..fadc816912 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -1,6 +1,6 @@ {- A remote that is only accessible by rsync. - - - Copyright 2011-2018 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -14,6 +14,7 @@ module Remote.Rsync ( remove, checkKey, withRsyncScratchDir, + rsyncRemoteConfigs, genRsyncOpts, RsyncOpts ) where @@ -30,6 +31,7 @@ import Remote.Helper.Special import Remote.Helper.Messages import Remote.Helper.ExportImport import Types.Export +import Types.ProposedAccepted import Remote.Rsync.RsyncUrl import Crypto import Utility.Rsync @@ -41,20 +43,31 @@ import Types.Creds import Annex.DirHashes import Utility.Tmp.Dir import Utility.SshHost +import Annex.SpecialRemote.Config import qualified Data.Map as M remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "rsync" , enumerate = const (findSpecialRemotes "rsyncurl") , generate = gen + , configParser = mkRemoteConfigParser $ rsyncRemoteConfigs ++ + [ optionalStringParser rsyncUrlField + (FieldDesc "(required) url or hostname:/directory for rsync to use") + ] , setup = rsyncSetup , exportSupported = exportIsSupported , importSupported = importUnsupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +shellEscapeField :: RemoteConfigField +shellEscapeField = Accepted "shellescape" + +rsyncUrlField :: RemoteConfigField +rsyncUrlField = Accepted "rsyncurl" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = do cst <- remoteCost gc expensiveRemoteCost (transport, url) <- rsyncTransport gc $ @@ -111,7 +124,14 @@ gen r u c gc rs = do -- Rsync displays its own progress. { displayProgress = False } -genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> Annex [CommandParam] -> RsyncUrl -> RsyncOpts +-- Things used by genRsyncOpts +rsyncRemoteConfigs :: [RemoteConfigFieldParser] +rsyncRemoteConfigs = + [ yesNoParser shellEscapeField True + (FieldDesc "avoid usual shell escaping (not recommended)") + ] + +genRsyncOpts :: ParsedRemoteConfig -> RemoteGitConfig -> Annex [CommandParam] -> RsyncUrl -> RsyncOpts genRsyncOpts c gc transport url = RsyncOpts { rsyncUrl = url , rsyncOptions = appendtransport $ opts [] @@ -119,7 +139,7 @@ genRsyncOpts c gc transport url = RsyncOpts opts (remoteAnnexRsyncUploadOptions gc) , rsyncDownloadOptions = appendtransport $ opts (remoteAnnexRsyncDownloadOptions gc) - , rsyncShellEscape = (yesNo =<< M.lookup "shellescape" c) /= Just False + , rsyncShellEscape = fromMaybe True (getRemoteConfigValue shellEscapeField c) } where appendtransport l = (++ l) <$> transport @@ -161,8 +181,8 @@ rsyncSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> Remo rsyncSetup _ mu _ c gc = do u <- maybe (liftIO genUUID) return mu -- verify configuration is sane - let url = fromMaybe (giveup "Specify rsyncurl=") $ - M.lookup "rsyncurl" c + let url = maybe (giveup "Specify rsyncurl=") fromProposedAccepted $ + M.lookup rsyncUrlField c (c', _encsetup) <- encryptionSetup c gc -- The rsyncurl is stored in git config, not only in this remote's diff --git a/Remote/Rsync/RsyncUrl.hs b/Remote/Rsync/RsyncUrl.hs index dc810dea4d..affa924ef9 100644 --- a/Remote/Rsync/RsyncUrl.hs +++ b/Remote/Rsync/RsyncUrl.hs @@ -43,10 +43,10 @@ mkRsyncUrl o f = rsyncUrl o rsyncEscape o f rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl] rsyncUrls o k = map use dirHashes where - use h = rsyncUrl o fromRawFilePath (hash h) rsyncEscape o (f f) + use h = rsyncUrl o hash h rsyncEscape o (f f) f = fromRawFilePath (keyFile k) #ifndef mingw32_HOST_OS - hash h = h def k + hash h = fromRawFilePath $ h def k #else - hash h = replace "\\" "/" (h def k) + hash h = replace "\\" "/" $ fromRawFilePath $ h def k #endif diff --git a/Remote/S3.hs b/Remote/S3.hs index 55d0b85fde..ff5484464d 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -1,6 +1,6 @@ {- S3 remotes - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -57,6 +57,7 @@ import Annex.Magic import Logs.Web import Logs.MetaData import Types.MetaData +import Types.ProposedAccepted import Utility.Metered import Utility.DataUnits import Annex.Content @@ -68,16 +69,92 @@ type BucketName = String type BucketObject = String remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "S3" , enumerate = const (findSpecialRemotes "s3") , generate = gen + , configParser = const $ pure $ RemoteConfigParser + { remoteConfigFieldParsers = + [ optionalStringParser bucketField + (FieldDesc "name of bucket to store content in") + , optionalStringParser hostField + (FieldDesc "S3 server hostname (default is Amazon S3)") + , optionalStringParser datacenterField + (FieldDesc "S3 datacenter to use (US, EU, us-west-1, ..)") + , optionalStringParser partsizeField + (FieldDesc "part size for multipart upload (eg 1GiB)") + , optionalStringParser storageclassField + (FieldDesc "storage class, eg STANDARD or REDUCED_REDUNDANCY") + , optionalStringParser fileprefixField + (FieldDesc "prefix to add to filenames in the bucket") + , yesNoParser versioningField False + (FieldDesc "enable versioning of bucket content") + , yesNoParser publicField False + (FieldDesc "allow public read access to the buckey") + , optionalStringParser publicurlField + (FieldDesc "url that can be used by public to download files") + , optionalStringParser protocolField + (FieldDesc "http or https") + , optionalStringParser portField + (FieldDesc "port to connect to") + , optionalStringParser requeststyleField + (FieldDesc "for path-style requests, set to \"path\"") + , optionalStringParser mungekeysField HiddenField + , optionalStringParser AWS.s3credsField HiddenField + ] + , remoteConfigRestPassthrough = Just + ( \f -> isMetaHeader f || isArchiveMetaHeader f + , + [ ("x-amz-meta-*", FieldDesc "http headers to add when storing on S3") + , ("x-archive-meta-*", FieldDesc "http headers to add when storing on Internet Archive") + ] + ) + } , setup = s3Setup , exportSupported = exportIsSupported , importSupported = importIsSupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +bucketField :: RemoteConfigField +bucketField = Accepted "bucket" + +hostField :: RemoteConfigField +hostField = Accepted "host" + +datacenterField :: RemoteConfigField +datacenterField = Accepted "datacenter" + +partsizeField :: RemoteConfigField +partsizeField = Accepted "partsize" + +storageclassField :: RemoteConfigField +storageclassField = Accepted "storageclass" + +fileprefixField :: RemoteConfigField +fileprefixField = Accepted "fileprefix" + +versioningField :: RemoteConfigField +versioningField = Accepted "versioning" + +publicField :: RemoteConfigField +publicField = Accepted "public" + +publicurlField :: RemoteConfigField +publicurlField = Accepted "publicurl" + +protocolField :: RemoteConfigField +protocolField = Accepted "protocol" + +requeststyleField :: RemoteConfigField +requeststyleField = Accepted "requeststyle" + +portField :: RemoteConfigField +portField = Accepted "port" + +mungekeysField :: RemoteConfigField +mungekeysField = Accepted "mungekeys" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = do cst <- remoteCost gc expensiveRemoteCost info <- extractS3Info c @@ -134,7 +211,7 @@ gen r u c gc rs = do , appendonly = versioning info , availability = GloballyAvailable , remotetype = remote - , mkUnavailable = gen r u (M.insert "host" "!dne!" c) gc rs + , mkUnavailable = gen r u (M.insert hostField (RemoteConfigValue ("!dne!" :: String)) c) gc rs , getInfo = includeCredsInfo c (AWS.creds u) (s3Info c info) , claimUrl = Nothing , checkUrl = Nothing @@ -148,21 +225,21 @@ s3Setup ss mu mcreds c gc = do s3Setup' :: SetupStage -> UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) s3Setup' ss u mcreds c gc - | configIA c = archiveorg + | maybe False (isIAHost . fromProposedAccepted) (M.lookup hostField c) = archiveorg | otherwise = defaulthost where remotename = fromJust (lookupName c) defbucket = remotename ++ "-" ++ fromUUID u defaults = M.fromList - [ ("datacenter", T.unpack $ AWS.defaultRegion AWS.S3) - , ("storageclass", "STANDARD") - , ("host", AWS.s3DefaultHost) - , ("port", "80") - , ("bucket", defbucket) + [ (datacenterField, Proposed $ T.unpack $ AWS.defaultRegion AWS.S3) + , (storageclassField, Proposed "STANDARD") + , (hostField, Proposed AWS.s3DefaultHost) + , (portField, Proposed "80") + , (bucketField, Proposed defbucket) ] - - use fullconfig info = do - enableBucketVersioning ss info fullconfig gc u + + use fullconfig pc info = do + enableBucketVersioning ss info pc gc u gitConfigSpecialRemote u fullconfig [("s3", "true")] return (fullconfig, u) @@ -170,36 +247,40 @@ s3Setup' ss u mcreds c gc (c', encsetup) <- encryptionSetup c gc c'' <- setRemoteCredPair encsetup c' gc (AWS.creds u) mcreds let fullconfig = c'' `M.union` defaults - info <- extractS3Info fullconfig - checkexportimportsafe fullconfig info + pc <- either giveup return . parseRemoteConfig fullconfig + =<< configParser remote fullconfig + info <- extractS3Info pc + checkexportimportsafe pc info case ss of - Init -> genBucket fullconfig gc u + Init -> genBucket pc gc u _ -> return () - use fullconfig info + use fullconfig pc info archiveorg = do showNote "Internet Archive mode" c' <- setRemoteCredPair noEncryptionUsed c gc (AWS.creds u) mcreds -- Ensure user enters a valid bucket name, since -- this determines the name of the archive.org item. - let validbucket = replace " " "-" $ - fromMaybe (giveup "specify bucket=") $ - getBucketName c' + let validbucket = replace " " "-" $ map toLower $ + maybe (giveup "specify bucket=") fromProposedAccepted + (M.lookup bucketField c') let archiveconfig = -- IA acdepts x-amz-* as an alias for x-archive-* - M.mapKeys (replace "x-archive-" "x-amz-") $ + M.mapKeys (Proposed . replace "x-archive-" "x-amz-" . fromProposedAccepted) $ -- encryption does not make sense here - M.insert encryptionField "none" $ - M.insert "bucket" validbucket $ + M.insert encryptionField (Proposed "none") $ + M.insert bucketField (Proposed validbucket) $ M.union c' $ -- special constraints on key names - M.insert "mungekeys" "ia" defaults - info <- extractS3Info archiveconfig - checkexportimportsafe archiveconfig info - hdl <- mkS3HandleVar archiveconfig gc u + M.insert mungekeysField (Proposed "ia") defaults + pc <- either giveup return . parseRemoteConfig archiveconfig + =<< configParser remote archiveconfig + info <- extractS3Info pc + checkexportimportsafe pc info + hdl <- mkS3HandleVar pc gc u withS3HandleOrFail u hdl $ - writeUUIDFile archiveconfig u info - use archiveconfig info + writeUUIDFile pc u info + use archiveconfig pc info checkexportimportsafe c' info = unlessM (Annex.getState Annex.force) $ @@ -293,7 +374,7 @@ storeHelper info h magic f object p = liftIO $ case partSize info of {- Implemented as a fileRetriever, that uses conduit to stream the chunks - out to the file. Would be better to implement a byteRetriever, but - that is difficult. -} -retrieve :: S3HandleVar -> Remote -> RemoteStateHandle -> RemoteConfig -> S3Info -> Retriever +retrieve :: S3HandleVar -> Remote -> RemoteStateHandle -> ParsedRemoteConfig -> S3Info -> Retriever retrieve hv r rs c info = fileRetriever $ \f k p -> withS3Handle hv $ \case (Just h) -> eitherS3VersionID info rs c k (T.pack $ bucketObject info k) >>= \case @@ -306,7 +387,7 @@ retrieve hv r rs c info = fileRetriever $ \f k p -> withS3Handle hv $ \case Left failreason -> do warning failreason giveup "cannot download content" - Right us -> unlessM (downloadUrl k p us f) $ + Right us -> unlessM (withUrlOptions $ downloadUrl k p us f) $ giveup "failed to download content" retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex () @@ -330,7 +411,7 @@ remove hv r info k = withS3HandleOrFail (uuid r) hv $ \h -> liftIO $ runResource S3.DeleteObject (T.pack $ bucketObject info k) (bucket info) return $ either (const False) (const True) res -checkKey :: S3HandleVar -> Remote -> RemoteStateHandle -> RemoteConfig -> S3Info -> CheckPresent +checkKey :: S3HandleVar -> Remote -> RemoteStateHandle -> ParsedRemoteConfig -> S3Info -> CheckPresent checkKey hv r rs c info k = withS3Handle hv $ \case Just h -> do showChecking r @@ -627,7 +708,7 @@ checkPresentExportWithContentIdentifierS3 hv r info _k loc knowncids = - so first check if the UUID file already exists and we can skip creating - it. -} -genBucket :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex () +genBucket :: ParsedRemoteConfig -> RemoteGitConfig -> UUID -> Annex () genBucket c gc u = do showAction "checking bucket" info <- extractS3Info c @@ -652,7 +733,7 @@ genBucket c gc u = do writeUUIDFile c u info h locconstraint = mkLocationConstraint $ T.pack datacenter - datacenter = fromJust $ M.lookup "datacenter" c + datacenter = fromJust $ getRemoteConfigValue datacenterField c -- "NEARLINE" as a storage class when creating a bucket is a -- nonstandard extension of Google Cloud Storage. storageclass = case getStorageClass c of @@ -667,7 +748,7 @@ genBucket c gc u = do - Note that IA buckets can only created by having a file - stored in them. So this also takes care of that. -} -writeUUIDFile :: RemoteConfig -> UUID -> S3Info -> S3Handle -> Annex () +writeUUIDFile :: ParsedRemoteConfig -> UUID -> S3Info -> S3Handle -> Annex () writeUUIDFile c u info h = do v <- checkUUIDFile c u info h case v of @@ -684,7 +765,7 @@ writeUUIDFile c u info h = do {- Checks if the UUID file exists in the bucket - and has the specified UUID already. -} -checkUUIDFile :: RemoteConfig -> UUID -> S3Info -> S3Handle -> Annex (Either SomeException Bool) +checkUUIDFile :: ParsedRemoteConfig -> UUID -> S3Info -> S3Handle -> Annex (Either SomeException Bool) checkUUIDFile c u info h = tryNonAsync $ liftIO $ runResourceT $ do resp <- tryS3 $ sendS3Handle h (S3.getObject (bucket info) file) case resp of @@ -700,7 +781,7 @@ checkUUIDFile c u info h = tryNonAsync $ liftIO $ runResourceT $ do file = T.pack $ uuidFile c uuidb = L.fromChunks [T.encodeUtf8 $ T.pack $ fromUUID u] -uuidFile :: RemoteConfig -> FilePath +uuidFile :: ParsedRemoteConfig -> FilePath uuidFile c = getFilePrefix c ++ "annex-uuid" tryS3 :: ResourceT IO a -> ResourceT IO (Either S3.S3Error a) @@ -724,7 +805,7 @@ type S3HandleVar = TVar (Either (Annex (Maybe S3Handle)) (Maybe S3Handle)) {- Prepares a S3Handle for later use. Does not connect to S3 or do anything - else expensive. -} -mkS3HandleVar :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex S3HandleVar +mkS3HandleVar :: ParsedRemoteConfig -> RemoteGitConfig -> UUID -> Annex S3HandleVar mkS3HandleVar c gc u = liftIO $ newTVarIO $ Left $ do mcreds <- getRemoteCredPair c gc (AWS.creds u) case mcreds of @@ -755,24 +836,24 @@ withS3HandleOrFail u hv a = withS3Handle hv $ \case needS3Creds :: UUID -> String needS3Creds u = missingCredPairFor "S3" (AWS.creds u) -s3Configuration :: RemoteConfig -> S3.S3Configuration AWS.NormalQuery +s3Configuration :: ParsedRemoteConfig -> S3.S3Configuration AWS.NormalQuery s3Configuration c = cfg { S3.s3Port = port - , S3.s3RequestStyle = case M.lookup "requeststyle" c of + , S3.s3RequestStyle = case getRemoteConfigValue requeststyleField c of Just "path" -> S3.PathStyle Just s -> giveup $ "bad S3 requeststyle value: " ++ s Nothing -> S3.s3RequestStyle cfg } where - h = fromJust $ M.lookup "host" c - datacenter = fromJust $ M.lookup "datacenter" c + h = fromJust $ getRemoteConfigValue hostField c + datacenter = fromJust $ getRemoteConfigValue datacenterField c -- When the default S3 host is configured, connect directly to -- the S3 endpoint for the configured datacenter. -- When another host is configured, it's used as-is. endpoint | h == AWS.s3DefaultHost = AWS.s3HostName $ T.pack datacenter | otherwise = T.encodeUtf8 $ T.pack h - port = case M.lookup "port" c of + port = case getRemoteConfigValue portField c of Just s -> case reads s of [(p, _)] @@ -787,7 +868,7 @@ s3Configuration c = cfg Just AWS.HTTPS -> 443 Just AWS.HTTP -> 80 Nothing -> 80 - cfgproto = case M.lookup "protocol" c of + cfgproto = case getRemoteConfigValue protocolField c of Just "https" -> Just AWS.HTTPS Just "http" -> Just AWS.HTTP Just s -> giveup $ "bad S3 protocol value: " ++ s @@ -814,7 +895,7 @@ data S3Info = S3Info , host :: Maybe String } -extractS3Info :: RemoteConfig -> Annex S3Info +extractS3Info :: ParsedRemoteConfig -> Annex S3Info extractS3Info c = do b <- maybe (giveup "S3 bucket not configured") @@ -829,13 +910,13 @@ extractS3Info c = do , metaHeaders = getMetaHeaders c , partSize = getPartSize c , isIA = configIA c - , versioning = boolcfg "versioning" - , public = boolcfg "public" - , publicurl = M.lookup "publicurl" c - , host = M.lookup "host" c + , versioning = fromMaybe False $ + getRemoteConfigValue versioningField c + , public = fromMaybe False $ + getRemoteConfigValue publicField c + , publicurl = getRemoteConfigValue publicurlField c + , host = getRemoteConfigValue hostField c } - where - boolcfg k = fromMaybe False $ yesNo =<< M.lookup k c putObject :: S3Info -> T.Text -> RequestBody -> S3.PutObject putObject info file rbody = (S3.putObject (bucket info) file rbody) @@ -850,41 +931,51 @@ acl info | public info = Just S3.AclPublicRead | otherwise = Nothing -getBucketName :: RemoteConfig -> Maybe BucketName -getBucketName = map toLower <$$> M.lookup "bucket" +getBucketName :: ParsedRemoteConfig -> Maybe BucketName +getBucketName = map toLower <$$> getRemoteConfigValue bucketField -getStorageClass :: RemoteConfig -> S3.StorageClass -getStorageClass c = case M.lookup "storageclass" c of +getStorageClass :: ParsedRemoteConfig -> S3.StorageClass +getStorageClass c = case getRemoteConfigValue storageclassField c of Just "REDUCED_REDUNDANCY" -> S3.ReducedRedundancy Just s -> S3.OtherStorageClass (T.pack s) _ -> S3.Standard -getPartSize :: RemoteConfig -> Maybe Integer -getPartSize c = readSize dataUnits =<< M.lookup "partsize" c +getPartSize :: ParsedRemoteConfig -> Maybe Integer +getPartSize c = readSize dataUnits =<< getRemoteConfigValue partsizeField c -getMetaHeaders :: RemoteConfig -> [(T.Text, T.Text)] -getMetaHeaders = map munge . filter ismetaheader . M.assocs +getMetaHeaders :: ParsedRemoteConfig -> [(T.Text, T.Text)] +getMetaHeaders = map munge + . filter (isMetaHeader . fst) + . M.assocs + . getRemoteConfigPassedThrough where - ismetaheader (h, _) = metaprefix `isPrefixOf` h - metaprefix = "x-amz-meta-" - metaprefixlen = length metaprefix - munge (k, v) = (T.pack $ drop metaprefixlen k, T.pack v) + metaprefixlen = length metaPrefix + munge (k, v) = (T.pack $ drop metaprefixlen (fromProposedAccepted k), T.pack v) -getFilePrefix :: RemoteConfig -> String -getFilePrefix = M.findWithDefault "" "fileprefix" +isMetaHeader :: RemoteConfigField -> Bool +isMetaHeader h = metaPrefix `isPrefixOf` fromProposedAccepted h -getBucketObject :: RemoteConfig -> Key -> BucketObject +isArchiveMetaHeader :: RemoteConfigField -> Bool +isArchiveMetaHeader h = "x-archive-" `isPrefixOf` fromProposedAccepted h + +metaPrefix :: String +metaPrefix = "x-amz-meta-" + +getFilePrefix :: ParsedRemoteConfig -> String +getFilePrefix = fromMaybe "" . getRemoteConfigValue fileprefixField + +getBucketObject :: ParsedRemoteConfig -> Key -> BucketObject getBucketObject c = munge . serializeKey where - munge s = case M.lookup "mungekeys" c of + munge s = case getRemoteConfigValue mungekeysField c :: Maybe String of Just "ia" -> iaMunge $ getFilePrefix c ++ s _ -> getFilePrefix c ++ s -getBucketExportLocation :: RemoteConfig -> ExportLocation -> BucketObject +getBucketExportLocation :: ParsedRemoteConfig -> ExportLocation -> BucketObject getBucketExportLocation c loc = getFilePrefix c ++ fromRawFilePath (fromExportLocation loc) -getBucketImportLocation :: RemoteConfig -> BucketObject -> Maybe ImportLocation +getBucketImportLocation :: ParsedRemoteConfig -> BucketObject -> Maybe ImportLocation getBucketImportLocation c obj -- The uuidFile should not be imported. | obj == uuidfile = Nothing @@ -910,8 +1001,8 @@ iaMunge = (>>= munge) | isSpace c = [] | otherwise = "&" ++ show (ord c) ++ ";" -configIA :: RemoteConfig -> Bool -configIA = maybe False isIAHost . M.lookup "host" +configIA :: ParsedRemoteConfig -> Bool +configIA = maybe False isIAHost . getRemoteConfigValue hostField {- Hostname to use for archive.org S3. -} iaHost :: HostName @@ -963,7 +1054,7 @@ debugMapper level t = forward "S3" (T.unpack t) AWS.Warning -> warningM AWS.Error -> errorM -s3Info :: RemoteConfig -> S3Info -> [(String, String)] +s3Info :: ParsedRemoteConfig -> S3Info -> [(String, String)] s3Info c info = catMaybes [ Just ("bucket", fromMaybe "unknown" (getBucketName c)) , Just ("endpoint", w82s (BS.unpack (S3.s3Endpoint s3c))) @@ -982,10 +1073,10 @@ s3Info c info = catMaybes showstorageclass (S3.OtherStorageClass t) = T.unpack t showstorageclass sc = show sc -getPublicWebUrls :: UUID -> RemoteStateHandle -> S3Info -> RemoteConfig -> Key -> Annex [URLString] +getPublicWebUrls :: UUID -> RemoteStateHandle -> S3Info -> ParsedRemoteConfig -> Key -> Annex [URLString] getPublicWebUrls u rs info c k = either (const []) id <$> getPublicWebUrls' u rs info c k -getPublicWebUrls' :: UUID -> RemoteStateHandle -> S3Info -> RemoteConfig -> Key -> Annex (Either String [URLString]) +getPublicWebUrls' :: UUID -> RemoteStateHandle -> S3Info -> ParsedRemoteConfig -> Key -> Annex (Either String [URLString]) getPublicWebUrls' u rs info c k | not (public info) = return $ Left $ "S3 bucket does not allow public access; " ++ needS3Creds u @@ -1125,7 +1216,7 @@ getS3VersionID rs k = do s3VersionField :: MetaField s3VersionField = mkMetaFieldUnchecked "V" -eitherS3VersionID :: S3Info -> RemoteStateHandle -> RemoteConfig -> Key -> S3.Object -> Annex (Either String (Either S3.Object S3VersionID)) +eitherS3VersionID :: S3Info -> RemoteStateHandle -> ParsedRemoteConfig -> Key -> S3.Object -> Annex (Either String (Either S3.Object S3VersionID)) eitherS3VersionID info rs c k fallback | versioning info = getS3VersionID rs k >>= return . \case [] -> if exportTree c @@ -1150,7 +1241,7 @@ getS3VersionIDPublicUrls mk info rs k = -- Enable versioning on the bucket can only be done at init time; -- setting versioning in a bucket that git-annex has already exported -- files to risks losing the content of those un-versioned files. -enableBucketVersioning :: SetupStage -> S3Info -> RemoteConfig -> RemoteGitConfig -> UUID -> Annex () +enableBucketVersioning :: SetupStage -> S3Info -> ParsedRemoteConfig -> RemoteGitConfig -> UUID -> Annex () #if MIN_VERSION_aws(0,21,1) enableBucketVersioning ss info c gc u = do #else @@ -1160,7 +1251,10 @@ enableBucketVersioning ss info _ _ _ = do Init -> when (versioning info) $ enableversioning (bucket info) Enable oldc -> do - oldinfo <- extractS3Info oldc + oldpc <- either (const mempty) id + . parseRemoteConfig oldc + <$> configParser remote oldc + oldinfo <- extractS3Info oldpc when (versioning info /= versioning oldinfo) $ giveup "Cannot change versioning= of existing S3 remote." where diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index b169a380f4..29d57d0461 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -13,7 +13,7 @@ - - Tahoe has its own encryption, so git-annex's encryption is not used. - - - Copyright 2014 Joey Hess + - Copyright 2014-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -30,9 +30,11 @@ import Control.Concurrent.STM import Annex.Common import Types.Remote import Types.Creds +import Types.ProposedAccepted import qualified Git import Config import Config.Cost +import Annex.SpecialRemote.Config import Remote.Helper.Special import Remote.Helper.ExportImport import Annex.UUID @@ -52,16 +54,27 @@ type IntroducerFurl = String type Capability = String remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "tahoe" , enumerate = const (findSpecialRemotes "tahoe") , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser scsField + (FieldDesc "optional, normally a unique one is generated") + , optionalStringParser furlField HiddenField + ] , setup = tahoeSetup , exportSupported = exportUnsupported , importSupported = importUnsupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +scsField :: RemoteConfigField +scsField = Accepted "shared-convergence-secret" + +furlField :: RemoteConfigField +furlField = Accepted "introducer-furl" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = do cst <- remoteCost gc expensiveRemoteCost hdl <- liftIO $ TahoeHandle @@ -102,22 +115,23 @@ gen r u c gc rs = do tahoeSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) tahoeSetup _ mu _ c _ = do - furl <- fromMaybe (fromMaybe missingfurl $ M.lookup furlk c) + furl <- maybe (fromMaybe missingfurl $ M.lookup furlField c) Proposed <$> liftIO (getEnv "TAHOE_FURL") u <- maybe (liftIO genUUID) return mu configdir <- liftIO $ defaultTahoeConfigDir u - scs <- liftIO $ tahoeConfigure configdir furl (M.lookup scsk c) - let c' = if (yesNo =<< M.lookup "embedcreds" c) == Just True + scs <- liftIO $ tahoeConfigure configdir + (fromProposedAccepted furl) + (fromProposedAccepted <$> (M.lookup scsField c)) + pc <- either giveup return . parseRemoteConfig c =<< configParser remote c + let c' = if embedCreds pc then flip M.union c $ M.fromList - [ (furlk, furl) - , (scsk, scs) + [ (furlField, furl) + , (scsField, Proposed scs) ] else c gitConfigSpecialRemote u c' [("tahoe", configdir)] return (c', u) where - scsk = "shared-convergence-secret" - furlk = "introducer-furl" missingfurl = giveup "Set TAHOE_FURL to the introducer furl to use." store :: RemoteStateHandle -> TahoeHandle -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool diff --git a/Remote/Web.hs b/Remote/Web.hs index 810c2f027e..a2c0cb6407 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -27,6 +27,7 @@ remote = RemoteType { typename = "web" , enumerate = list , generate = gen + , configParser = mkRemoteConfigParser [] , setup = error "not supported" , exportSupported = exportUnsupported , importSupported = importUnsupported @@ -40,7 +41,7 @@ list _autoinit = do r <- liftIO $ Git.Construct.remoteNamed "web" (pure Git.Construct.fromUnknown) return [r] -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r _ c gc rs = do cst <- remoteCost gc expensiveRemoteCost return $ Just Remote @@ -89,7 +90,7 @@ downloadKey key _af dest p = unVerified $ get =<< getWebUrls key YoutubeDownloader -> do showOutput youtubeDlTo key u' dest - _ -> downloadUrl key p [u'] dest + _ -> Url.withUrlOptions $ downloadUrl key p [u'] dest downloadKeyCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool downloadKeyCheap _ _ _ = return False diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 08c3d528cc..6879d96e03 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -1,6 +1,6 @@ {- WebDAV remotes. - - - Copyright 2012-2017 Joey Hess + - Copyright 2012-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -29,6 +29,7 @@ import Types.Export import qualified Git import Config import Config.Cost +import Annex.SpecialRemote.Config import Remote.Helper.Special import Remote.Helper.Messages import Remote.Helper.Http @@ -39,18 +40,30 @@ import Utility.Metered import Utility.Url (URLString, matchStatusCodeException, matchHttpExceptionContent) import Annex.UUID import Remote.WebDAV.DavLocation +import Types.ProposedAccepted remote :: RemoteType -remote = RemoteType +remote = specialRemoteType $ RemoteType { typename = "webdav" , enumerate = const (findSpecialRemotes "webdav") , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser urlField + (FieldDesc "(required) url to the WebDAV directory") + , optionalStringParser davcredsField HiddenField + ] , setup = webdavSetup , exportSupported = exportIsSupported , importSupported = importUnsupported } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +urlField :: RemoteConfigField +urlField = Accepted "url" + +davcredsField :: RemoteConfigField +davcredsField = Accepted "davcreds" + +gen :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u c gc rs = new <$> remoteCost gc expensiveRemoteCost where new cst = Just $ specialRemote c @@ -95,9 +108,9 @@ gen r u c gc rs = new <$> remoteCost gc expensiveRemoteCost , appendonly = False , availability = GloballyAvailable , remotetype = remote - , mkUnavailable = gen r u (M.insert "url" "http://!dne!/" c) gc rs + , mkUnavailable = gen r u (M.insert urlField (RemoteConfigValue "http://!dne!/") c) gc rs , getInfo = includeCredsInfo c (davCreds u) $ - [("url", fromMaybe "unknown" (M.lookup "url" c))] + [("url", fromMaybe "unknown" $ getRemoteConfigValue urlField c)] , claimUrl = Nothing , checkUrl = Nothing , remoteStateHandle = rs @@ -107,11 +120,12 @@ gen r u c gc rs = new <$> remoteCost gc expensiveRemoteCost webdavSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) webdavSetup _ mu mcreds c gc = do u <- maybe (liftIO genUUID) return mu - url <- case M.lookup "url" c of - Nothing -> giveup "Specify url=" - Just url -> return url + url <- maybe (giveup "Specify url=") + (return . fromProposedAccepted) + (M.lookup urlField c) (c', encsetup) <- encryptionSetup c gc - creds <- maybe (getCreds c' gc u) (return . Just) mcreds + pc <- either giveup return . parseRemoteConfig c' =<< configParser remote c' + creds <- maybe (getCreds pc gc u) (return . Just) mcreds testDav url creds gitConfigSpecialRemote u c' [("webdav", "true")] c'' <- setRemoteCredPair encsetup c' gc (davCreds u) creds @@ -255,7 +269,7 @@ runExport Nothing _ = return False runExport (Just h) a = fromMaybe False <$> liftIO (goDAV h $ safely (a h)) configUrl :: Remote -> Maybe URLString -configUrl r = fixup <$> M.lookup "url" (config r) +configUrl r = fixup <$> getRemoteConfigValue urlField (config r) where -- box.com DAV url changed fixup = replace "https://www.box.com/dav/" boxComUrl @@ -335,14 +349,14 @@ mkColRecursive d = go =<< existsDAV d inLocation d mkCol ) -getCreds :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex (Maybe CredPair) +getCreds :: ParsedRemoteConfig -> RemoteGitConfig -> UUID -> Annex (Maybe CredPair) getCreds c gc u = getRemoteCredPairFor "webdav" c gc (davCreds u) davCreds :: UUID -> CredPairStorage davCreds u = CredPairStorage { credPairFile = fromUUID u , credPairEnvironment = ("WEBDAV_USERNAME", "WEBDAV_PASSWORD") - , credPairRemoteField = "davcreds" + , credPairRemoteField = davcredsField } {- Content-Type to use for files uploaded to WebDAV. -} diff --git a/Test.hs b/Test.hs index 9d9e0e7554..5c09d6d537 100644 --- a/Test.hs +++ b/Test.hs @@ -1614,7 +1614,7 @@ test_crypto = do annexed_present annexedfile {- Ensure the configuration complies with the encryption scheme, and - that all keys are encrypted properly for the given directory remote. -} - testEncryptedRemote scheme ks c keys = case Remote.Helper.Encryptable.extractCipher c of + testEncryptedRemote scheme ks c keys = case Remote.Helper.Encryptable.extractCipher pc of Just cip@Crypto.SharedCipher{} | scheme == "shared" && isNothing ks -> checkKeys cip Nothing Just cip@(Crypto.EncryptedCipher encipher v ks') @@ -1622,6 +1622,8 @@ test_crypto = do checkKeys cip (Just v) <&&> checkCipher encipher ks' _ -> return False where + pc =either mempty id $ + Remote.Helper.Encryptable.parseEncryptionConfig c keysMatch (Utility.Gpg.KeyIds ks') = maybe False (\(Utility.Gpg.KeyIds ks2) -> sort (nub ks2) == sort (nub ks')) ks @@ -1630,7 +1632,7 @@ test_crypto = do checkScheme Types.Crypto.PubKey = scheme == "pubkey" checkKeys cip mvariant = do dummycfg <- Types.GitConfig.dummyRemoteGitConfig - let encparams = (mempty :: Types.Remote.RemoteConfig, dummycfg) + let encparams = (mempty :: Types.Remote.ParsedRemoteConfig, dummycfg) cipher <- Crypto.decryptCipher gpgcmd encparams cip files <- filterM doesFileExist $ map ("dir" ) $ concatMap (serializeKeys cipher) keys diff --git a/Types/Crypto.hs b/Types/Crypto.hs index 7dd860934a..48a6ad0cc8 100644 --- a/Types/Crypto.hs +++ b/Types/Crypto.hs @@ -1,11 +1,12 @@ {- git-annex crypto types - - - Copyright 2011-2015 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} module Types.Crypto ( + EncryptionMethod(..), Cipher(..), StorableCipher(..), EncryptedCipherVariant(..), @@ -14,6 +15,7 @@ module Types.Crypto ( Mac(..), readMac, showMac, + macMap, defaultMac, calcMac, ) where @@ -21,6 +23,17 @@ module Types.Crypto ( import Utility.Hash import Utility.Gpg (KeyIds(..)) +import Data.Typeable +import qualified Data.Map as M + +data EncryptionMethod + = NoneEncryption + | SharedEncryption + | PubKeyEncryption + | SharedPubKeyEncryption + | HybridEncryption + deriving (Typeable, Eq) + -- XXX ideally, this would be a locked memory region data Cipher = Cipher String | MacOnlyCipher String @@ -50,9 +63,13 @@ showMac HmacSha512 = "HMACSHA512" -- Read the MAC algorithm from the remote config. readMac :: String -> Maybe Mac -readMac "HMACSHA1" = Just HmacSha1 -readMac "HMACSHA224" = Just HmacSha224 -readMac "HMACSHA256" = Just HmacSha256 -readMac "HMACSHA384" = Just HmacSha384 -readMac "HMACSHA512" = Just HmacSha512 -readMac _ = Nothing +readMac n = M.lookup n macMap + +macMap :: M.Map String Mac +macMap = M.fromList + [ ("HMACSHA1", HmacSha1) + , ("HMACSHA224", HmacSha224) + , ("HMACSHA256", HmacSha256) + , ("HMACSHA384", HmacSha384) + , ("HMACSHA512", HmacSha512) + ] diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 30ae5c0df3..f7d0da2d34 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -80,6 +80,7 @@ data GitConfig = GitConfig , annexAutoCommit :: Configurable Bool , annexResolveMerge :: Configurable Bool , annexSyncContent :: Configurable Bool + , annexSyncOnlyAnnex :: Configurable Bool , annexDebug :: Bool , annexWebOptions :: [String] , annexYoutubeDlOptions :: [String] @@ -152,6 +153,8 @@ extractGitConfig configsource r = GitConfig getmaybebool (annex "resolvemerge") , annexSyncContent = configurable False $ getmaybebool (annex "synccontent") + , annexSyncOnlyAnnex = configurable False $ + getmaybebool (annex "synconlyannex") , annexDebug = getbool (annex "debug") False , annexWebOptions = getwords (annex "web-options") , annexYoutubeDlOptions = getwords (annex "youtube-dl-options") @@ -232,6 +235,7 @@ mergeGitConfig :: GitConfig -> GitConfig -> GitConfig mergeGitConfig gitconfig repoglobals = gitconfig { annexAutoCommit = merge annexAutoCommit , annexSyncContent = merge annexSyncContent + , annexSyncOnlyAnnex = merge annexSyncOnlyAnnex , annexResolveMerge = merge annexResolveMerge , annexLargeFiles = merge annexLargeFiles , annexDotFiles = merge annexDotFiles diff --git a/Types/ProposedAccepted.hs b/Types/ProposedAccepted.hs new file mode 100644 index 0000000000..a48c94546a --- /dev/null +++ b/Types/ProposedAccepted.hs @@ -0,0 +1,38 @@ +{- proposed and accepted values + - + - Copyright 2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Types.ProposedAccepted where + +import Test.QuickCheck + +-- | A value that may be proposed, or accepted. +-- +-- When parsing/validating the value, may want to error out on invalid +-- input. But if a previous version of git-annex accepted an invalid value, +-- it's too late to error out, and instead the bad value may be ignored. +data ProposedAccepted t = Proposed t | Accepted t + deriving (Show) + +fromProposedAccepted :: ProposedAccepted t -> t +fromProposedAccepted (Proposed t) = t +fromProposedAccepted (Accepted t) = t + +-- | Whether a value is proposed or accepted does not matter when checking +-- equality. +instance Eq t => Eq (ProposedAccepted t) where + a == b = fromProposedAccepted a == fromProposedAccepted b + +-- | Order by the contained value, not by whether it's proposed or +-- accepted. +instance Ord t => Ord (ProposedAccepted t) where + compare a b = compare (fromProposedAccepted a) (fromProposedAccepted b) + +instance Arbitrary t => Arbitrary (ProposedAccepted t) where + arbitrary = oneof + [ Proposed <$> arbitrary + , Accepted <$> arbitrary + ] diff --git a/Types/Remote.hs b/Types/Remote.hs index 0604228f86..12a37a618c 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -2,7 +2,7 @@ - - Most things should not need this, using Types instead - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -10,8 +10,7 @@ {-# LANGUAGE RankNTypes #-} module Types.Remote - ( RemoteConfigField - , RemoteConfig + ( module Types.RemoteConfig , RemoteTypeA(..) , RemoteA(..) , RemoteStateHandle @@ -28,7 +27,6 @@ module Types.Remote ) where -import qualified Data.Map as M import Data.Ord import qualified Git @@ -42,6 +40,7 @@ import Types.UrlContents import Types.NumCopies import Types.Export import Types.Import +import Types.RemoteConfig import Config.Cost import Utility.Metered import Git.Types (RemoteName) @@ -49,10 +48,6 @@ import Utility.SafeCommand import Utility.Url import Utility.DataUnits -type RemoteConfigField = String - -type RemoteConfig = M.Map RemoteConfigField String - data SetupStage = Init | Enable RemoteConfig {- There are different types of remotes. -} @@ -63,13 +58,15 @@ data RemoteTypeA a = RemoteType -- The Bool is True if automatic initialization of remotes is desired , enumerate :: Bool -> a [Git.Repo] -- generates a remote of this type - , generate :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> a (Maybe (RemoteA a)) + , generate :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> a (Maybe (RemoteA a)) + -- parse configs of remotes of this type + , configParser :: RemoteConfig -> a RemoteConfigParser -- initializes or enables a remote , setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID) -- check if a remote of this type is able to support export - , exportSupported :: RemoteConfig -> RemoteGitConfig -> a Bool + , exportSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool -- check if a remote of this type is able to support import - , importSupported :: RemoteConfig -> RemoteGitConfig -> a Bool + , importSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool } instance Eq (RemoteTypeA a) where @@ -124,7 +121,7 @@ data RemoteA a = Remote -- Runs an action to repair the remote's git repository. , repairRepo :: Maybe (a Bool -> a (IO Bool)) -- a Remote has a persistent configuration store - , config :: RemoteConfig + , config :: ParsedRemoteConfig -- Get the git repo for the Remote. , getRepo :: a Git.Repo -- a Remote's configuration from git diff --git a/Types/RemoteConfig.hs b/Types/RemoteConfig.hs new file mode 100644 index 0000000000..f0df89a10c --- /dev/null +++ b/Types/RemoteConfig.hs @@ -0,0 +1,69 @@ +{- git-annex remote config types + - + - Copyright 2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE GADTs #-} + +module Types.RemoteConfig where + +import qualified Data.Map as M +import qualified Data.Set as S +import Data.Typeable + +import Types.ProposedAccepted + +type RemoteConfigField = ProposedAccepted String + +{- What the user provides to configure the remote, and what is stored for + - later; a bunch of fields and values. -} +type RemoteConfig = M.Map RemoteConfigField (ProposedAccepted String) + +{- Before being used a RemoteConfig has to be parsed. -} +type ParsedRemoteConfig = M.Map RemoteConfigField RemoteConfigValue + +{- Remotes can have configuration values of many types, so use Typeable + - to let them all be stored in here. -} +data RemoteConfigValue where + RemoteConfigValue :: Typeable v => v -> RemoteConfigValue + +{- Parse a field's value provided by the user into a RemoteConfigValue. + - + - The RemoteConfig is provided to the parser function for cases + - where multiple fields need to be looked at. However, it's important + - that, when a parser looks at an additional field in that way, the + - parser list contains a dedicated parser for that field as well. + - Presence of fields that are not included in this list will cause + - a parse failure. + -} +data RemoteConfigFieldParser = RemoteConfigFieldParser + { parserForField :: RemoteConfigField + , valueParser :: Maybe (ProposedAccepted String) -> RemoteConfig -> Either String (Maybe RemoteConfigValue) + , fieldDesc :: FieldDesc + , valueDesc :: Maybe ValueDesc + } + +data FieldDesc + = FieldDesc String + | HiddenField + +newtype ValueDesc = ValueDesc String + +data RemoteConfigParser = RemoteConfigParser + { remoteConfigFieldParsers :: [RemoteConfigFieldParser] + , remoteConfigRestPassthrough :: Maybe (RemoteConfigField -> Bool, [(String, FieldDesc)]) + } + +mkRemoteConfigParser :: Monad m => [RemoteConfigFieldParser] -> RemoteConfig -> m RemoteConfigParser +mkRemoteConfigParser l _ = pure (RemoteConfigParser l Nothing) + +addRemoteConfigParser :: [RemoteConfigFieldParser] -> RemoteConfigParser -> RemoteConfigParser +addRemoteConfigParser l rpc = rpc + { remoteConfigFieldParsers = + remoteConfigFieldParsers rpc ++ filter isnew l + } + where + s = S.fromList (map parserForField (remoteConfigFieldParsers rpc)) + isnew p = not (S.member (parserForField p) s) diff --git a/Types/RemoteState.hs b/Types/RemoteState.hs index 6e3cd23b74..aef102a7d4 100644 --- a/Types/RemoteState.hs +++ b/Types/RemoteState.hs @@ -9,7 +9,7 @@ module Types.RemoteState where import Types.UUID -{- When per-remote state, its UUID is used to identify it. +{- When there is per-remote state, remotes are identified by UUID. - - However, sameas remotes mean that two different Remote implementations - can be used for the same underlying data store. To avoid them using diff --git a/Types/StandardGroups.hs b/Types/StandardGroups.hs index 5f00903570..7f83fc3c9d 100644 --- a/Types/StandardGroups.hs +++ b/Types/StandardGroups.hs @@ -11,9 +11,10 @@ module Types.StandardGroups where import Types.Remote (RemoteConfig) import Types.Group +import Types.ProposedAccepted +import Annex.SpecialRemote.Config (preferreddirField) import qualified Data.Map as M -import Data.Maybe type PreferredContentExpression = String @@ -71,7 +72,8 @@ associatedDirectory :: Maybe RemoteConfig -> StandardGroup -> Maybe FilePath associatedDirectory _ SmallArchiveGroup = Just "archive" associatedDirectory _ FullArchiveGroup = Just "archive" associatedDirectory (Just c) PublicGroup = Just $ - fromMaybe "public" $ M.lookup "preferreddir" c + maybe "public" fromProposedAccepted $ + M.lookup preferreddirField c associatedDirectory Nothing PublicGroup = Just "public" associatedDirectory _ _ = Nothing diff --git a/Upgrade/V2.hs b/Upgrade/V2.hs index e255403d58..4a70a05d1a 100644 --- a/Upgrade/V2.hs +++ b/Upgrade/V2.hs @@ -67,16 +67,17 @@ upgrade = do locationLogs :: Annex [(Key, FilePath)] locationLogs = do + config <- Annex.getGitConfig dir <- fromRepo gitStateDir liftIO $ do levela <- dirContents dir levelb <- mapM tryDirContents levela files <- mapM tryDirContents (concat levelb) - return $ mapMaybe islogfile (concat files) + return $ mapMaybe (islogfile config) (concat files) where tryDirContents d = catchDefaultIO [] $ dirContents d - islogfile f = maybe Nothing (\k -> Just (k, f)) $ - locationLogFileKey (toRawFilePath f) + islogfile config f = maybe Nothing (\k -> Just (k, f)) $ + locationLogFileKey config (toRawFilePath f) inject :: FilePath -> FilePath -> Annex () inject source dest = do diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs index f9e98145a7..4c099ff3a4 100644 --- a/Utility/FileSystemEncoding.hs +++ b/Utility/FileSystemEncoding.hs @@ -43,6 +43,7 @@ import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.UTF8 as S8 import qualified Data.ByteString.Lazy.UTF8 as L8 #endif +import System.FilePath.ByteString (RawFilePath, encodeFilePath, decodeFilePath) import Utility.Exception import Utility.Split @@ -171,21 +172,11 @@ encodeBL' = L.pack . decodeW8 encodeBL' = L8.fromString #endif -{- Recent versions of the unix package have this alias; defined here - - for backwards compatibility. -} -type RawFilePath = S.ByteString - -{- Note that the RawFilePath is assumed to never contain NUL, - - since filename's don't. This should only be used with actual - - RawFilePaths not arbitrary ByteString that may contain NUL. -} fromRawFilePath :: RawFilePath -> FilePath -fromRawFilePath = decodeBS' +fromRawFilePath = decodeFilePath -{- Note that the FilePath is assumed to never contain NUL, - - since filename's don't. This should only be used with actual FilePaths - - not arbitrary String that may contain NUL. -} toRawFilePath :: FilePath -> RawFilePath -toRawFilePath = encodeBS' +toRawFilePath = encodeFilePath {- Converts a [Word8] to a FilePath, encoding using the filesystem encoding. - diff --git a/Utility/Url.hs b/Utility/Url.hs index 2aa4e6a589..7ef0f75ec6 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -1,6 +1,6 @@ {- Url downloading. - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - License: BSD-2-clause -} @@ -8,6 +8,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE BangPatterns #-} module Utility.Url ( newManager, @@ -35,6 +36,10 @@ module Utility.Url ( parseURIRelaxed, matchStatusCodeException, matchHttpExceptionContent, + BasicAuth(..), + GetBasicAuth, + noBasicAuth, + applyBasicAuth', ) where import Common @@ -84,6 +89,7 @@ data UrlOptions = UrlOptions , applyRequest :: Request -> Request , httpManager :: Manager , allowedSchemes :: S.Set Scheme + , getBasicAuth :: GetBasicAuth } data UrlDownloader @@ -101,10 +107,11 @@ defUrlOptions = UrlOptions <*> pure id <*> newManager tlsManagerSettings <*> pure (S.fromList $ map mkScheme ["http", "https", "ftp"]) + <*> pure noBasicAuth -mkUrlOptions :: Maybe UserAgent -> Headers -> UrlDownloader -> Manager -> S.Set Scheme -> UrlOptions -mkUrlOptions defuseragent reqheaders urldownloader manager = - UrlOptions useragent reqheaders urldownloader applyrequest manager +mkUrlOptions :: Maybe UserAgent -> Headers -> UrlDownloader -> Manager -> S.Set Scheme -> GetBasicAuth -> UrlOptions +mkUrlOptions defuseragent reqheaders urldownloader manager getbasicauth = + UrlOptions useragent reqheaders urldownloader applyrequest manager getbasicauth where applyrequest = \r -> r { requestHeaders = requestHeaders r ++ addedheaders } addedheaders = uaheader ++ otherheaders @@ -197,14 +204,14 @@ getUrlInfo url uo = case parseURIRelaxed url of Nothing -> return (Right dne) where go :: URI -> IO (Either String UrlInfo) - go u = case (urlDownloader uo, parseUrlRequest (show u)) of + go u = case (urlDownloader uo, parseRequest (show u)) of (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust -- When http redirects to a protocol which -- conduit does not support, it will throw -- a StatusCodeException with found302 -- and a Response with the redir Location. (matchStatusCodeException (== found302)) - (Right <$> existsconduit req) + (Right <$> existsconduit req uo) (followredir r) `catchNonAsync` (const $ return $ Right dne) (DownloadWithConduit (DownloadWithCurlRestricted r), Nothing) @@ -243,18 +250,28 @@ getUrlInfo url uo = case parseURIRelaxed url of extractfilename = contentDispositionFilename . B8.toString <=< lookup hContentDisposition . responseHeaders - existsconduit req = do + existsconduit req uo' = do let req' = headRequest (applyRequest uo req) debugM "url" (show req') - runResourceT $ do + join $ runResourceT $ do resp <- http req' (httpManager uo) -- forces processing the response while -- within the runResourceT liftIO $ if responseStatus resp == ok200 - then found - (extractlen resp) - (extractfilename resp) - else return dne + then do + let !len = extractlen resp + let !fn = extractfilename resp + return $ found len fn + else if responseStatus resp == unauthorized401 + then return $ getBasicAuth uo' (show (getUri req)) >>= \case + Nothing -> return dne + Just (ba, signalsuccess) -> do + ui <- existsconduit + (applyBasicAuth' ba req) + (uo' { getBasicAuth = noBasicAuth }) + signalsuccess (urlExists ui) + return ui + else return $ return dne existscurl u curlparams = do output <- catchDefaultIO "" $ @@ -284,6 +301,7 @@ getUrlInfo url uo = case parseURIRelaxed url of sz <- getFileSize' f stat found (Just sz) Nothing Nothing -> return dne + followredir r (HttpExceptionRequest _ (StatusCodeException resp _)) = case headMaybe $ map decodeBS $ getResponseHeader hLocation resp of Just url' -> case parseURIRelaxed url' of @@ -334,7 +352,7 @@ download' nocurlerror meterupdate url file uo = where go = case parseURIRelaxed url of Just u -> checkPolicy uo u $ - case (urlDownloader uo, parseUrlRequest (show u)) of + case (urlDownloader uo, parseRequest (show u)) of (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust (matchStatusCodeException (== found302)) (downloadConduit meterupdate req file uo >> return (Right ())) @@ -416,12 +434,18 @@ downloadConduit :: MeterUpdate -> Request -> FilePath -> UrlOptions -> IO () downloadConduit meterupdate req file uo = catchMaybeIO (getFileSize file) >>= \case Just sz | sz > 0 -> resumedownload sz - _ -> runResourceT $ do + _ -> join $ runResourceT $ do liftIO $ debugM "url" (show req') resp <- http req' (httpManager uo) if responseStatus resp == ok200 - then store zeroBytesProcessed WriteMode resp - else respfailure resp + then do + store zeroBytesProcessed WriteMode resp + return (return ()) + else if responseStatus resp == unauthorized401 + then return $ getBasicAuth uo (show (getUri req')) >>= \case + Nothing -> respfailure resp + Just ba -> retryauthed ba + else return $ respfailure resp where req' = applyRequest uo $ req -- Override http-client's default decompression of gzip @@ -440,15 +464,23 @@ downloadConduit meterupdate req file uo = dl (const noop) where - dl = runResourceT $ do + dl = join $ runResourceT $ do let req'' = req' { requestHeaders = resumeFromHeader sz : requestHeaders req } liftIO $ debugM "url" (show req'') resp <- http req'' (httpManager uo) if responseStatus resp == partialContent206 - then store (toBytesProcessed sz) AppendMode resp + then do + store (toBytesProcessed sz) AppendMode resp + return (return ()) else if responseStatus resp == ok200 - then store zeroBytesProcessed WriteMode resp - else respfailure resp + then do + store zeroBytesProcessed WriteMode resp + return (return ()) + else if responseStatus resp == unauthorized401 + then return $ getBasicAuth uo (show (getUri req'')) >>= \case + Nothing -> respfailure resp + Just ba -> retryauthed ba + else return $ respfailure resp alreadydownloaded sz s h = s == requestedRangeNotSatisfiable416 && case lookup hContentRange h of @@ -469,6 +501,18 @@ downloadConduit meterupdate req file uo = respfailure = giveup . B8.toString . statusMessage . responseStatus + retryauthed (ba, signalsuccess) = do + r <- tryNonAsync $ downloadConduit + meterupdate + (applyBasicAuth' ba req) + file + (uo { getBasicAuth = noBasicAuth }) + case r of + Right () -> signalsuccess True + Left e -> do + () <- signalsuccess False + throwM e + {- Sinks a Response's body to a file. The file can either be opened in - WriteMode or AppendMode. Updates the meter as data is received. - @@ -502,7 +546,7 @@ downloadPartial url uo n = case parseURIRelaxed url of Nothing -> return Nothing Just u -> go u `catchNonAsync` const (return Nothing) where - go u = case parseUrlRequest (show u) of + go u = case parseRequest (show u) of Nothing -> return Nothing Just req -> do let req' = applyRequest uo req @@ -517,9 +561,6 @@ parseURIRelaxed :: URLString -> Maybe URI parseURIRelaxed s = maybe (parseURIRelaxed' s) Just $ parseURI $ escapeURIString isAllowedInURI s -parseUrlRequest :: URLString -> Maybe Request -parseUrlRequest = parseUrlThrow - {- Some characters like '[' are allowed in eg, the address of - an uri, but cannot appear unescaped further along in the uri. - This handles that, expensively, by successively escaping each character @@ -628,3 +669,22 @@ curlRestrictedParams r u defport ps = case uriAuthority u of , Param "--max-redirs", Param "0" ] bracketaddr a = "[" ++ a ++ "]" + +data BasicAuth = BasicAuth + { basicAuthUser :: String + , basicAuthPassword :: String + } + +-- Note that this is only used when using conduit, not curl. +-- +-- The returned IO action is run after trying to use the BasicAuth, +-- indicating if the password worked. +type GetBasicAuth = URLString -> IO (Maybe (BasicAuth, Bool -> IO ())) + +noBasicAuth :: GetBasicAuth +noBasicAuth = const $ pure Nothing + +applyBasicAuth' :: BasicAuth -> Request -> Request +applyBasicAuth' ba = applyBasicAuth + (encodeBS (basicAuthUser ba)) + (encodeBS (basicAuthPassword ba)) diff --git a/doc/bugs/2_ssh_connection_prompts_for_password.mdwn b/doc/bugs/2_ssh_connection_prompts_for_password.mdwn index 7d718fbc94..633c8a3b62 100644 --- a/doc/bugs/2_ssh_connection_prompts_for_password.mdwn +++ b/doc/bugs/2_ssh_connection_prompts_for_password.mdwn @@ -63,3 +63,4 @@ get R042/R042-2013-08-16/R042-2013-08-16-CSC01a.ncs get R042/R042-2013-08-16/R04 [[!meta author=yoh]] [[!tag projects/datalad]] +> warning added; [[done]] --[[Joey]] diff --git a/doc/bugs/2_ssh_connection_prompts_for_password/comment_4_43e5a90d6215b414f4e3357ea91af663._comment b/doc/bugs/2_ssh_connection_prompts_for_password/comment_4_43e5a90d6215b414f4e3357ea91af663._comment new file mode 100644 index 0000000000..7ae63772c4 --- /dev/null +++ b/doc/bugs/2_ssh_connection_prompts_for_password/comment_4_43e5a90d6215b414f4e3357ea91af663._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2020-01-23T15:51:46Z" + content=""" +I notice that debug output has no BatchMode=true in any ssh call. But +the version of git-annex you show always runs ssh with that when +-J is used, unless sshcaching is disabled. + +More evidence that sshcaching is disabled in your transcript is that when +it does run ssh, it does not pass -S. + +I think the repository must be on a crippled filesystem, on which +git-annex can't do ssh connection caching, because the filesystem +does not support unix sockets. (Or it potentially could be crippled in some +other way.) So it ignores the annex.sshcaching setting. +You could work around this by setting the (undocumented) +GIT_ANNEX_TMP_DIR to some temporary directory on a non-crippled filesystem. + +I'm going to add a warning message in this situation. +"""]] diff --git a/doc/bugs/2_ssh_connection_prompts_for_password/comment_5_27b5669cecba4863ce296049804e82af._comment b/doc/bugs/2_ssh_connection_prompts_for_password/comment_5_27b5669cecba4863ce296049804e82af._comment new file mode 100644 index 0000000000..10f405fd1f --- /dev/null +++ b/doc/bugs/2_ssh_connection_prompts_for_password/comment_5_27b5669cecba4863ce296049804e82af._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 5" + date="2020-01-23T17:51:58Z" + content=""" +Thank you Joey! I can only confirm that the file system was likely a crippled/NFS one... So we would likely need to do some sensing on DataLad side and instruct git-annex. Will continue on our end at https://github.com/datalad/datalad/issues/4075 +"""]] diff --git a/doc/bugs/A_case_where_file_tracked_by_git_unexpectedly_becomes_annex_pointer_file/comment_6_b713bf610c4f4d9c2463b61721bce406._comment b/doc/bugs/A_case_where_file_tracked_by_git_unexpectedly_becomes_annex_pointer_file/comment_6_b713bf610c4f4d9c2463b61721bce406._comment new file mode 100644 index 0000000000..9dba03060f --- /dev/null +++ b/doc/bugs/A_case_where_file_tracked_by_git_unexpectedly_becomes_annex_pointer_file/comment_6_b713bf610c4f4d9c2463b61721bce406._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 6""" + date="2020-01-01T17:41:13Z" + content=""" +I've added git-annex add --force-large and --force-small, which would be +good to use to avoid this kind of too-broad overriding problem in the future. +"""]] diff --git a/doc/bugs/Doesn__39__t_build_with_hinotify__44__0.3.10___47___fsnotify__44__0.2.1.2.mdwn b/doc/bugs/Doesn__39__t_build_with_hinotify__44__0.3.10___47___fsnotify__44__0.2.1.2.mdwn index 9c81ffe172..0d91c4a440 100644 --- a/doc/bugs/Doesn__39__t_build_with_hinotify__44__0.3.10___47___fsnotify__44__0.2.1.2.mdwn +++ b/doc/bugs/Doesn__39__t_build_with_hinotify__44__0.3.10___47___fsnotify__44__0.2.1.2.mdwn @@ -194,3 +194,5 @@ Utility/DirWatcher/INotify.hs:130:50: error: ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) Yes! + +> [[fixed|done]] a long time ago --[[Joey]] diff --git a/doc/bugs/Incorrect_install_dir_for_fish_completion.mdwn b/doc/bugs/Incorrect_install_dir_for_fish_completion.mdwn new file mode 100644 index 0000000000..d400715579 --- /dev/null +++ b/doc/bugs/Incorrect_install_dir_for_fish_completion.mdwn @@ -0,0 +1,39 @@ +### Please describe the problem. + +[git-annex Makefile: install-completions](http://source.git-annex.branchable.com/?p=source.git;a=blob;f=Makefile;h=965f53e1fc4a8f6d69041eabaccd759268f6490f;hb=HEAD#l87) + +git-annex installs fish completions to the wrong directory. `$(SHAREDIR)/fish/completions` is the directory documented as being exclusive to completions which are shipped in the fish source code; third-party applications installing their own completions are intended to use `$(SHAREDIR)/fish/vendor_completions.d` instead. + +See [https://fishshell.com/docs/current/index.html#completion-path](https://fishshell.com/docs/current/index.html#completion-path) + +Note that this location can also be obtained in a similar manner to bash-completion: + +``` +$ pkg-config bash-completion --variable=completionsdir +/usr/share/bash-completion/completions +``` + +``` +$ pkg-config fish --variable=completionsdir +/usr/share/fish/vendor_completions.d +``` + +### What steps will reproduce the problem? + +Run "make install-completions", or install a linux distribution package of git-annex that builds with the current Makefile (Arch Linux or Debian will both show the same issue). + +### What version of git-annex are you using? On what operating system? + +Arch Linux + +git-annex 7.20191230-7 + +### Please provide any additional information below. + +Apparently this is a very common mistake :/ so far I've seen many more projects do this wrong than do it right. + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +Not a user, just here to help improve cross-distro packaging. :) + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/Incorrect_install_dir_for_fish_completion/comment_1_c5601e6da07e39a0c23d29ee0a728970._comment b/doc/bugs/Incorrect_install_dir_for_fish_completion/comment_1_c5601e6da07e39a0c23d29ee0a728970._comment new file mode 100644 index 0000000000..6de076e590 --- /dev/null +++ b/doc/bugs/Incorrect_install_dir_for_fish_completion/comment_1_c5601e6da07e39a0c23d29ee0a728970._comment @@ -0,0 +1,32 @@ +[[!comment format=mdwn + username="Chel" + avatar="http://cdn.libravatar.org/avatar/a42feb5169f70b3edf7f7611f7e3640c" + subject="comment 1" + date="2020-02-03T23:34:02Z" + content=""" +I've got an error from `make install` on version 7.20200202.7: + +~~~ +./git-annex --fish-completion-script git-annex 2>/dev/null \ + > dest/usr/local/share/fish/completions/git-annex.fish +make: *** [Makefile:87: install-completions] Error 2 +~~~ + +I think there should be: + +[[!format diff \"\"\" +diff --git a/Makefile b/Makefile +index eb3a34e6a..722921e00 100644 +--- a/Makefile ++++ b/Makefile +@@ -86,7 +86,7 @@ install-completions: build + > $(DESTDIR)$(ZSH_COMPLETIONS_PATH)/_git-annex + install -d $(DESTDIR)$(PREFIX)/$(SHAREDIR)/fish/vendor_completions.d + ./git-annex --fish-completion-script git-annex 2>/dev/null \ +- > $(DESTDIR)$(PREFIX)/$(SHAREDIR)/fish/completions/git-annex.fish ++ > $(DESTDIR)$(PREFIX)/$(SHAREDIR)/fish/vendor_completions.d/git-annex.fish + + test: git-annex git-annex-shell + ./git-annex test +\"\"\"]] +"""]] diff --git a/doc/bugs/Incorrect_install_dir_for_fish_completion/comment_2_7607d3170194f8ad46e0c0178b3ea317._comment b/doc/bugs/Incorrect_install_dir_for_fish_completion/comment_2_7607d3170194f8ad46e0c0178b3ea317._comment new file mode 100644 index 0000000000..87dfd3d2f5 --- /dev/null +++ b/doc/bugs/Incorrect_install_dir_for_fish_completion/comment_2_7607d3170194f8ad46e0c0178b3ea317._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="eschwartz@5abb721e66990e478c7d1caf96beb4f9794eb168" + nickname="eschwartz" + avatar="http://cdn.libravatar.org/avatar/16ec8475b4e3507f8d1a71101c16b208" + subject="Partial fix only." + date="2020-02-03T23:47:35Z" + content=""" +Looks like the install -d was changed to create the new directory, but the actual file write got left left out of the commit. +"""]] diff --git a/doc/bugs/Incorrect_install_dir_for_fish_completion/comment_3_020854d8b66be6b17f850075fa8d95ac._comment b/doc/bugs/Incorrect_install_dir_for_fish_completion/comment_3_020854d8b66be6b17f850075fa8d95ac._comment new file mode 100644 index 0000000000..3e18d4c192 --- /dev/null +++ b/doc/bugs/Incorrect_install_dir_for_fish_completion/comment_3_020854d8b66be6b17f850075fa8d95ac._comment @@ -0,0 +1,7 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-02-04T16:09:04Z" + content=""" +So it did. Fixed now. Thanks for checking. +"""]] diff --git a/doc/bugs/Parallel_fsck_on_files_with_same_content_in_bup_remote_can_fail.mdwn b/doc/bugs/Parallel_fsck_on_files_with_same_content_in_bup_remote_can_fail.mdwn new file mode 100644 index 0000000000..d631ae5a4f --- /dev/null +++ b/doc/bugs/Parallel_fsck_on_files_with_same_content_in_bup_remote_can_fail.mdwn @@ -0,0 +1,149 @@ +### Please describe the problem. + +If there are multiple files with the same keys in the repository and they are copied to bup special remote, +then `git annex fsck --from=bup` with `--jobs=N` option (N >= 2) can show an error and remove these keys from bup. + +Based on the error message (about locked .git/annex/tmp/ file), this problem is probably not specific to bup, +but I tested it with bup only. + +### What steps will reproduce the problem? + +1. Configure a bup special remote. +2. Add files with the same content to annex (and with the same backend). +3. Copy these files to bup. +4. Run `git annex fsck --from=bup -JN` several times, until it removes these keys from bup. + +### What version of git-annex are you using? On what operating system? + +git-annex 7.20191230-g985373f8e, build from source, on Debian GNU/Linux buster. + +bup 0.29.3-2 from Debian sid. Also tried with bup 0.30, build from source. + +### Please provide any additional information below. + +[[!format txt """ +~ $ mkdir testdir +~ $ cd testdir +~/testdir $ +~/testdir $ git init +Initialized empty Git repository in /home/test/testdir/.git/ +~/testdir $ +~/testdir $ git annex init testrepo +init testrepo (scanning for unlocked files...) +ok +(recording state in git...) +~/testdir $ +~/testdir $ ls ~/.bup/index-cache/ +~/testdir $ +~/testdir $ git annex initremote bup type=bup buprepo=~/testdir/.bup encryption=none +initremote bup (bup init...) +Reinitialized existing Git repository in /home/test/.bup/ +Initialized empty Git repository in /home/test/testdir/.bup/ +ok +(recording state in git...) +~/testdir $ +~/testdir $ ls ~/.bup/index-cache/ +None__home_test_testdir__bup +~/testdir $ +~/testdir $ echo aaa >file1 +~/testdir $ echo aaa >file2 +~/testdir $ +~/testdir $ git annex add . +add file1 +ok +add file2 +ok +(recording state in git...) +~/testdir $ +~/testdir $ git commit -m files +[master (root-commit) 7a03b66] files + 2 files changed, 2 insertions(+) + create mode 120000 file1 + create mode 120000 file2 +~/testdir $ +~/testdir $ git -C .bup show-ref +~/testdir $ +~/testdir $ git annex whereis +whereis file1 (1 copy) + 5d9b0df2-000b-4273-bc4a-fb3b9d8319bd -- testrepo [here] +ok +whereis file2 (1 copy) + 5d9b0df2-000b-4273-bc4a-fb3b9d8319bd -- testrepo [here] +ok +~/testdir $ +~/testdir $ git annex copy --to=bup . +copy file1 (to bup...) + +bloom: creating from 1 file (3 objects).ing: 0 kbytes +Receiving index from server: 1156/1156, done. +bloom: creating from 1 file (3 objects). +ok +copy file2 ok +(recording state in git...) +~/testdir $ +~/testdir $ git annex lookupkey file1 file2 +SHA256E-s4--17e682f060b5f8e47ea04c5c4855908b0a5ad612022260fe50e11ecb0cc0ab76 +SHA256E-s4--17e682f060b5f8e47ea04c5c4855908b0a5ad612022260fe50e11ecb0cc0ab76 +~/testdir $ +~/testdir $ git -C .bup show-ref +2076647ee23ad632c8cf96caf51febbd0604452c refs/heads/SHA256E-s4--17e682f060b5f8e47ea04c5c4855908b0a5ad612022260fe50e11ecb0cc0ab76 +~/testdir $ +~/testdir $ git annex fsck --from=bup +fsck file1 +(checksum...) ok +fsck file2 +(checksum...) ok +(recording state in git...) +~/testdir $ +~/testdir $ git -C .bup show-ref +2076647ee23ad632c8cf96caf51febbd0604452c refs/heads/SHA256E-s4--17e682f060b5f8e47ea04c5c4855908b0a5ad612022260fe50e11ecb0cc0ab76 +"""]] + +Now run `git annex fsck --from=bup -J2` multiple times, until it drops the key from bup... + +[[!format txt """ +~/testdir $ git annex fsck --from=bup -J2 +fsck file1 fsck file2 + +100% 4 B 5 B/s 0s + content cannot be completely removed from bup remote + + file2: Bad file size (4 B smaller); dropped from bup +(checksum...) +git-annex: .git/annex/tmp/fsck14654.SHA256E-s4--17e682f060b5f8e47ea04c5c4855908b0a5ad612022260fe50e11ecb0cc0ab76: openBinaryFile: resource busy (file is locked) +failed +(fixing location log) (checksum...) ok +(recording state in git...) +git-annex: fsck: 1 failed +~/testdir $ +~/testdir $ git -C .bup show-ref +~/testdir $ +~/testdir $ git annex whereis +whereis file1 (2 copies) + 5d9b0df2-000b-4273-bc4a-fb3b9d8319bd -- testrepo [here] + 88cc362a-f87a-43c7-b194-e79b2ee91828 -- [bup] +ok +whereis file2 (2 copies) + 5d9b0df2-000b-4273-bc4a-fb3b9d8319bd -- testrepo [here] + 88cc362a-f87a-43c7-b194-e79b2ee91828 -- [bup] +ok +~/testdir $ +~/testdir $ git annex fsck --from=bup +fsck file1 (fixing location log) + ** Based on the location log, file1 + ** was expected to be present, but its content is missing. +failed +fsck file2 ok +(recording state in git...) +git-annex: fsck: 1 failed +~/testdir $ +~/testdir $ git annex whereis +whereis file1 (1 copy) + 5d9b0df2-000b-4273-bc4a-fb3b9d8319bd -- testrepo [here] +ok +whereis file2 (1 copy) + 5d9b0df2-000b-4273-bc4a-fb3b9d8319bd -- testrepo [here] +ok +"""]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/Parallel_fsck_on_files_with_same_content_in_bup_remote_can_fail/comment_1_8cc0d742cd59046b038879f4823c9639._comment b/doc/bugs/Parallel_fsck_on_files_with_same_content_in_bup_remote_can_fail/comment_1_8cc0d742cd59046b038879f4823c9639._comment new file mode 100644 index 0000000000..755f2bbf5f --- /dev/null +++ b/doc/bugs/Parallel_fsck_on_files_with_same_content_in_bup_remote_can_fail/comment_1_8cc0d742cd59046b038879f4823c9639._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-02-14T18:49:07Z" + content=""" +Ugh, I think this could potentially result in data loss. Not when using bup, +but other special remotes. + +I've fixed it in git and will think about moving the date of the next +release up. +"""]] diff --git a/doc/bugs/Remote_Tests__58___storeKey_fails_for_http_remotes___40__skip_instead__63____41__.mdwn b/doc/bugs/Remote_Tests__58___storeKey_fails_for_http_remotes___40__skip_instead__63____41__.mdwn new file mode 100644 index 0000000000..217385e652 --- /dev/null +++ b/doc/bugs/Remote_Tests__58___storeKey_fails_for_http_remotes___40__skip_instead__63____41__.mdwn @@ -0,0 +1,85 @@ +### Please describe the problem. +When running `git annex testremote origin --test-readonly=filename` on a git http remote that supports git-annex, the `storeKey` test fails with error: + +``` +storeKey: FAIL +./Command/TestRemote.hs:277: +(got: Left "copying to non-ssh repo not supported") +``` + +(Full example output below) + + +### What steps will reproduce the problem? + +Using https://downloads.kitenet.net/.git/ as an example of a public repository with https git annex support: + +``` +$ git clone https://downloads.kitenet.net/.git/ +Cloning into 'downloads.kitenet.net'... +$ cd downloads.kitenet.net/debug-me/linux/current/ +$ git annex get debug-me-standalone-amd64.tar.gz +get debug-me-standalone-amd64.tar.gz (from origin...) +(checksum...) ok +(recording state in git...) +$ git annex testremote origin --test-readonly debug-me-standalone-amd64.tar.gz +testremote origin Remote Tests + unavailable remote + removeKey: dropping from http remote not supported +OK + storeKey: FAIL + ./Command/TestRemote.hs:277: + (got: Left "copying to non-ssh repo not supported") + checkPresent: OK (0.02s) + retrieveKeyFile: download failed: ConnectionFailure Network.Socket.getAddrInfo (called with preferred socket type/protocol: AddrInfo {addrFlags = [AI_ADDRCONFIG], addrFamily = AF_UNSPEC, addrSocketType = Stream, addrProtocol = 6, addrAddress = , addrCanonName = }, host name: Just "!dne!", service name: Just "443"): does not exist (Name or service not known) + download failed: ConnectionFailure Network.Socket.getAddrInfo (called with preferred socket type/protocol: AddrInfo {addrFlags = [AI_ADDRCONFIG], addrFamily = AF_UNSPEC, addrSocketType = Stream, addrProtocol = 6, addrAddress = , addrCanonName = }, host name: Just "!dne!", service name: Just "443"): does not exist (Name or service not known) +OK (0.01s) + retrieveKeyFileCheap: OK + key size Just 13600699; NoChunks; encryption none + present True: OK (0.61s) + retrieveKeyFile: OK (2.39s) + fsck downloaded object: OK + retrieveKeyFile resume from 33%: OK (1.95s) + fsck downloaded object: OK + retrieveKeyFile resume from 0: OK (1.94s) + fsck downloaded object: OK + retrieveKeyFile resume from end: OK (0.68s) + fsck downloaded object: OK + +1 out of 14 tests failed (7.61s) +failed +git-annex: testremote: 1 failed +``` + +### What version of git-annex are you using? On what operating system? + +``` +git-annex version: 7.20191230-g985373f8e +build flags: Assistant Webapp Pairing S3 WebDAV Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite +dependency versions: aws-0.21.1 bloomfilter-2.0.1.0 cryptonite-0.26 DAV-1.3.4 feed-1.2.0.1 ghc-8.6.5 http-client-0.6.4 persistent-sqlite-2.10.5 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs hook external +operating system: linux x86_64 +supported repository versions: 7 +upgrade supported from repository versions: 0 1 2 3 4 5 6 +``` + +OS: Arch Linux (5.5.1-arch1-1) + +### Please provide any additional information below. + +I came across this while trying to make our hosted git and git-annex service (gin.g-node.org) open to public https git annex downloads. I was using the `testremote` command to make sure everything works as intended and saw the error thinking, at first, that the server was serving something incorrectly. The error message does clearly state that `copying to non-ssh repo not supported`, so I thought I'd try with kitenet to see if the same happens and it does. + +I don't know if this is a bug or intentional—perhaps the test should be failing to indicate that the remote doesn't support `storeKey`? On the other hand, the `removeKey` test displays the "not supported" message and then passes, so maybe the `storeKey` test should behave the same way. + +It's possible there's another issue here I'm not entirely aware of. + + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +Of course! I use and rely on it daily :) + +> This is not a bug in the test suite, it turns out, but in +> git-annex's handling of a http remote. It was throwing fatal errors +> rather than the correct behavior of displaying a warning. [[fixed|done]] +> --[[Joey]] diff --git a/doc/todo/S3_special_remote_support_for_DigitalOcean_Spaces.mdwn b/doc/bugs/S3_special_remote_support_for_DigitalOcean_Spaces.mdwn similarity index 100% rename from doc/todo/S3_special_remote_support_for_DigitalOcean_Spaces.mdwn rename to doc/bugs/S3_special_remote_support_for_DigitalOcean_Spaces.mdwn diff --git a/doc/bugs/S3_special_remote_support_for_DigitalOcean_Spaces/comment_1_12fd9dfb47e157e1d38c5e88a543498b._comment b/doc/bugs/S3_special_remote_support_for_DigitalOcean_Spaces/comment_1_12fd9dfb47e157e1d38c5e88a543498b._comment new file mode 100644 index 0000000000..56718b48e5 --- /dev/null +++ b/doc/bugs/S3_special_remote_support_for_DigitalOcean_Spaces/comment_1_12fd9dfb47e157e1d38c5e88a543498b._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-06T17:41:21Z" + content=""" +This is either a bug in or +Digital Ocean's implementation of the S3 protocol. I don't know which, +but I do know it has to be fixed in one of those two place, and not +in git-annex. + +The fact that the aws library can't even parse an error message out of +their response kind of says something. + +But the real problem seems to +While they claim to support V2 signatures, +my guess is there's a problem with their support for V2, since they're +mostly dealing with V4. The aws library's support for V4 is experimental +and [apparently buggy](https://github.com/aristidb/aws/issues/262), but +as git-annex uses it, it will only use V2. +"""]] diff --git a/doc/bugs/WSL1__58___git-annex-add_fails_in_DrvFs_filesystem/comment_1_6b54cc0a268885570170620222b774d7._comment b/doc/bugs/WSL1__58___git-annex-add_fails_in_DrvFs_filesystem/comment_1_6b54cc0a268885570170620222b774d7._comment new file mode 100644 index 0000000000..3677a6ec55 --- /dev/null +++ b/doc/bugs/WSL1__58___git-annex-add_fails_in_DrvFs_filesystem/comment_1_6b54cc0a268885570170620222b774d7._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="annex2384@290036d126d86bcec28ee2f2ead549de1f59e90e" + nickname="annex2384" + avatar="http://cdn.libravatar.org/avatar/ad36fdc55abd8b9913b774fcd0177709" + subject="Using -o metadata?" + date="2020-02-04T03:22:45Z" + content=""" +I'm having the same issue, though I can add files if I set annex.addunlocked=true. +"""]] diff --git a/doc/bugs/annex.genmetadata_should_default_to_true.mdwn b/doc/bugs/annex.genmetadata_should_default_to_true.mdwn index afc004ed0a..5aef50d45b 100644 --- a/doc/bugs/annex.genmetadata_should_default_to_true.mdwn +++ b/doc/bugs/annex.genmetadata_should_default_to_true.mdwn @@ -9,3 +9,5 @@ Add a file to a fresh annex, observe it has no metadata. ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) Yes I love it! (Except for its spotty timestamp support) + +> [[notabug|done]] --[[Joey]] diff --git a/doc/bugs/annex.genmetadata_should_default_to_true/comment_4_615bb36fb8e2958c001fc6df997b2928._comment b/doc/bugs/annex.genmetadata_should_default_to_true/comment_4_615bb36fb8e2958c001fc6df997b2928._comment new file mode 100644 index 0000000000..2e6f134778 --- /dev/null +++ b/doc/bugs/annex.genmetadata_should_default_to_true/comment_4_615bb36fb8e2958c001fc6df997b2928._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="reasons not to have annex.genmetadata default to true" + date="2020-02-10T18:40:52Z" + content=""" +Some reasons `annex.genmetadata` should *not* default to true: (1) ordinary git does not preserve file modtimes, probably on purpose: if you have some kind of `make` process, you want `git update` to cause updated files to have updated modtimes, not the modtimes from when the files were added to git, so that `make` can detect the change and update downstream files; (2) as @joey noted, potentially wasteful bloat, especially for repos with many files; (3) two different copies of a file may have different modtimes, but all copies must have the same git-annex metadata, because metadata is attached to the [[key|backends]], which for most backends is computed from file contents. + +The WOM backend stores the modtime in the key, but then does not store checksums. If [[todo/external_backends]] are implemented, you could make one that includes both the checksum and the modtime in the key. +"""]] diff --git a/doc/bugs/annex.genmetadata_should_default_to_true/comment_5_8cd47851ca76005e0011da7c2f62d77d._comment b/doc/bugs/annex.genmetadata_should_default_to_true/comment_5_8cd47851ca76005e0011da7c2f62d77d._comment new file mode 100644 index 0000000000..69f6444e69 --- /dev/null +++ b/doc/bugs/annex.genmetadata_should_default_to_true/comment_5_8cd47851ca76005e0011da7c2f62d77d._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2020-02-17T16:47:33Z" + content=""" +I agree with Ilya's points. + +And it's unresonable to characterize this as data loss, because git itself +does not store file timestamp data. Making such mischaracterizations does, +however, cause me, as the maintainer, to wonder if this is a feature that +is worth keeping, since I have no interest in descending that rabbit hole +or fighting such accusations. Generally, going to the strongest possible +argument, when requesting a change, is not actually your best move. + +Closing this bug report. +"""]] diff --git a/doc/bugs/annex_doesn__39__t_fixup_symlinks_when___34__git_commit_path__95__to__95__repo__34___is_used.mdwn b/doc/bugs/annex_doesn__39__t_fixup_symlinks_when___34__git_commit_path__95__to__95__repo__34___is_used.mdwn index 4a5ead8e86..e7cdc074c9 100644 --- a/doc/bugs/annex_doesn__39__t_fixup_symlinks_when___34__git_commit_path__95__to__95__repo__34___is_used.mdwn +++ b/doc/bugs/annex_doesn__39__t_fixup_symlinks_when___34__git_commit_path__95__to__95__repo__34___is_used.mdwn @@ -78,3 +78,7 @@ note that there is no "(recording state in git...) ..." portion in the output! [[!meta author=yoh]] [[!tag projects/datalad]] + +> This was fixed by v7 mode, which no longer needs to deal with old-style +> unlocked files and so is not impacted by git false indexes any longer. +> I've verified the test case no longer reproduces. [[done]] --[[Joey]] diff --git a/doc/bugs/assistant_doesn__39__t_notice_changes_to_gitignore_files.mdwn b/doc/bugs/assistant_doesn__39__t_notice_changes_to_gitignore_files.mdwn new file mode 100644 index 0000000000..805aaf00f2 --- /dev/null +++ b/doc/bugs/assistant_doesn__39__t_notice_changes_to_gitignore_files.mdwn @@ -0,0 +1,41 @@ +### Please describe the problem. + +Once the assistant daemon is started, it won't notice updates to any gitignore files. This is probably because it spawns a long-lived `git check-ignore -z --stdin --verbose --non-matching` process which only reads gitignore files on startup. + +### What steps will reproduce the problem? + +- Start the assistant +- Add a pattern to a relevant `.gitignore` or `.git/info/exclude` +- (Just to be safe) wait for the assistant to commit and sync that change +- Create a file matching the new ignore pattern +- Observe the assistant committing the file to the repository despite it matching the new ignore pattern + +### What version of git-annex are you using? On what operating system? + +7.20191230-gef6d1e327 on openSUSE Leap 15.1 + +### Please provide any additional information below. + +I assume that the obvious fix would be to watch for changes to gitignore files, and restart the `check-ignore` process accordingly. + +A workaround is to restart the assistant. I've tested this and it works fine. + +Unsurprisingly the daemon log just shows the normal behaviour so it's probably not very useful, but including anyway just in case: + +[[!format sh """ +# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log +Everything up-to-date +[2020-01-06 15:40:21.863988735] Committer: Committing changes to git +(recording state in git...) +[2020-01-06 15:40:22.024703938] Pusher: Syncing with peer1 +[2020-01-06 15:40:23.025650786] Committer: Committing changes to git +(recording state in git...) +To tor-annex::.onion:5227 + 841db50..d47d59e master -> synced/master + +# End of transcript or log. +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +Yes, worked great for many years, but struggling to get the assistant to a point I can use it. Main blockers are [[assistant_sometimes_removes_and_re-adds_whole_file]] and [[todo/wishlist__58___disable_automatic_commits]]. diff --git a/doc/bugs/awkward_error_from_annex_whenever_operating_on_a_clone_with_submodules/comment_2_f5bf66e8494b4ee2c6da7f3a6de8da18._comment b/doc/bugs/awkward_error_from_annex_whenever_operating_on_a_clone_with_submodules/comment_2_f5bf66e8494b4ee2c6da7f3a6de8da18._comment new file mode 100644 index 0000000000..d39502ab24 --- /dev/null +++ b/doc/bugs/awkward_error_from_annex_whenever_operating_on_a_clone_with_submodules/comment_2_f5bf66e8494b4ee2c6da7f3a6de8da18._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-01-23T19:20:47Z" + content=""" +The bug I earlier hypothesized might be the same as this one has since been +fixed. So it would be good to know if this bug is fixed. I do not think +enough information was provided for me to be able to reproduce it. +"""]] diff --git a/doc/bugs/brew_install_git-annex_failed.mdwn b/doc/bugs/brew_install_git-annex_failed.mdwn new file mode 100644 index 0000000000..e75a66cdcd --- /dev/null +++ b/doc/bugs/brew_install_git-annex_failed.mdwn @@ -0,0 +1,50 @@ +### Please describe the problem. + +brew install git-annex failed. + +### What steps will reproduce the problem? + + +### What version of git-annex are you using? On what operating system? +Mac OS X 10.11 El Capitan + +### Please provide any additional information below. + +[[!format sh """ +95b3d57ce7--git-annex-7.20191230.tar.gz +==> cabal v1-sandbox init +==> cabal v1-update +==> cabal v1-install --jobs=4 --max-backjumps=100000 alex +==> cabal v1-install --jobs=4 --max-backjumps=100000 happy +==> cabal v1-install --jobs=4 --max-backjumps=100000 c2hs +==> cabal v1-install --jobs=4 --max-backjumps=100000 --only-dependencies --constraint http-conduit>=2.3 --constraint net +==> cabal v1-configure --flags=s3 webapp +==> cabal v1-install --jobs=4 --max-backjumps=100000 --prefix=/usr/local/Cellar/git-annex/7.20191230 --constraint http-c +Last 15 lines from /Users/choi/Library/Logs/Homebrew/git-annex/08.cabal: +StandaloneDeriving + +Please enable the extensions by copy/pasting these lines into the top of your file: + +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE StandaloneDeriving #-} + | +31 | share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase| + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^... +cabal: Leaving directory '.' +cabal: Error: some packages failed to install: +git-annex-7.20191230-JGm7b2Gk5I8w0hi2BDCiw failed during the building phase. +The exception was: +ExitFailure 1 + + +Do not report this issue to Homebrew/brew or Homebrew/core! + +These open issues may also help: +git-annex-remote-rclone 0.6 (new formula) https://github.com/Homebrew/homebrew-core/pull/49468 +git-annex: add OBJC_DISABLE_INITIALIZE_FORK_SAFETY environment variable https://github.com/Homebrew/homebrew-core/pull/48411 +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + + +> [[fixed|done]] in git-annex master --[[Joey]] diff --git a/doc/bugs/brew_install_git-annex_failed/comment_1_57b2c2c989311b4db01aca8d07802207._comment b/doc/bugs/brew_install_git-annex_failed/comment_1_57b2c2c989311b4db01aca8d07802207._comment new file mode 100644 index 0000000000..f8e923f11b --- /dev/null +++ b/doc/bugs/brew_install_git-annex_failed/comment_1_57b2c2c989311b4db01aca8d07802207._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="nrg@bd619d1ebf16e6324c546adea8be8fe1cc2b4325" + nickname="nrg" + avatar="http://cdn.libravatar.org/avatar/428b6c95b52769cf9eecdd351018eacb" + subject="Confirmed with macOS 10.14.6 building git-annex-7.20200202.7" + date="2020-02-03T23:10:22Z" + content=""" +The issue is also seen [here](https://github.com/Homebrew/homebrew-core/pull/49731). +"""]] diff --git a/doc/bugs/brew_install_git-annex_failed/comment_2_a398db56168954d43620620104215b14._comment b/doc/bugs/brew_install_git-annex_failed/comment_2_a398db56168954d43620620104215b14._comment new file mode 100644 index 0000000000..d6931cda0f --- /dev/null +++ b/doc/bugs/brew_install_git-annex_failed/comment_2_a398db56168954d43620620104215b14._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="nrg@bd619d1ebf16e6324c546adea8be8fe1cc2b4325" + nickname="nrg" + avatar="http://cdn.libravatar.org/avatar/428b6c95b52769cf9eecdd351018eacb" + subject="Change introduced by persistent-sqlite and persistent-template" + date="2020-02-04T15:09:59Z" + content=""" +The change was introduced [here](https://github.com/yesodweb/persistent/commit/6ca1c2401f228293c64ae05e6109d4936b98c4b9). +"""]] diff --git a/doc/bugs/brew_install_git-annex_failed/comment_3_627876a558898bb9f622b80dbacee051._comment b/doc/bugs/brew_install_git-annex_failed/comment_3_627876a558898bb9f622b80dbacee051._comment new file mode 100644 index 0000000000..373cb3e662 --- /dev/null +++ b/doc/bugs/brew_install_git-annex_failed/comment_3_627876a558898bb9f622b80dbacee051._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-02-04T15:56:00Z" + content=""" +I think I've fixed this in master now, but have not been able to test the fix +yet since I don't have the bandwidth to upgrade. +"""]] diff --git a/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test.mdwn b/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test.mdwn new file mode 100644 index 0000000000..9ff3429c8f --- /dev/null +++ b/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test.mdwn @@ -0,0 +1,22 @@ +### Please describe the problem. + +Full build logs are at http://neuro.debian.net/_files/_buildlogs/git-annex/7.20191230+git152-gefb981388 + + + +[[!format sh """ +... + prop_read_write_transferinfo: FAIL + *** Failed! Exception: 'recoverEncode: invalid argument (invalid character)' (after 1 test): + Exception thrown while showing test case: 'recoverEncode: invalid argument (invalid character)' + Use --quickcheck-replay=507010 to reproduce. + + +"""]] + + + +[[!meta author=yoh]] +[[!tag projects/datalad]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test/comment_1_e9db58f71eedc99ccfd7a7a446843316._comment b/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test/comment_1_e9db58f71eedc99ccfd7a7a446843316._comment new file mode 100644 index 0000000000..86f92e591b --- /dev/null +++ b/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test/comment_1_e9db58f71eedc99ccfd7a7a446843316._comment @@ -0,0 +1,31 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-02-02T19:41:34Z" + content=""" +Minimal reproducer: + + bash$ LANG=C ghci Utility/FileSystemEncoding.hs + ghci> useFileSystemEncoding + ghci> toRawFilePath "\611584" + "*** Exception: recoverEncode: invalid argument (invalid character) + +No such problem in a unicode locale. + +The problem does not, though, affect actually using git-annex in LANG=C +with a filename with that in its name. + +Odd because the filesystem encoding is supposed to round-tip well, +anything, but here encoding a string with it is failing internally. +Maybe the thing is, it's not really round-tripping? QuickCheck arbitrary +magics up a FilePath that contains that, so it's starting in the middle and +trying to convert it out. + +[[!commit 70395659db9f662e61009d984fc9b0b2f24fdece]] introduced this while +fixing another intermittent encoding test case failure. + + ghci> Data.Char.generalCategory '\611584' + NotAssigned + +I think it would make sense to filter out NotAssigned and PrivateUse. +"""]] diff --git a/doc/bugs/could_webdav_be_more_resilient_to_timeouts__63__.mdwn b/doc/bugs/could_webdav_be_more_resilient_to_timeouts__63__.mdwn index 083f1351e6..3c34b3f9bf 100644 --- a/doc/bugs/could_webdav_be_more_resilient_to_timeouts__63__.mdwn +++ b/doc/bugs/could_webdav_be_more_resilient_to_timeouts__63__.mdwn @@ -42,3 +42,6 @@ apparently it is actually timing out on checking (I guess after chunk completion [[!meta author="yoh"]] + +> I see that the bug in the DAV library has been fixed (in 2018), +> so hopefully nothing more needs to be done. [[done]] --[[Joey]] diff --git a/doc/bugs/fails_to___96__get__96___in_parallel_for_a_freshly_clone_from_public_s3_bucket_where_versioning_info_was_forgotten/comment_1_917b46d9ca69c4e8181d041d5414d373._comment b/doc/bugs/fails_to___96__get__96___in_parallel_for_a_freshly_clone_from_public_s3_bucket_where_versioning_info_was_forgotten/comment_1_917b46d9ca69c4e8181d041d5414d373._comment new file mode 100644 index 0000000000..0c2e7c1dea --- /dev/null +++ b/doc/bugs/fails_to___96__get__96___in_parallel_for_a_freshly_clone_from_public_s3_bucket_where_versioning_info_was_forgotten/comment_1_917b46d9ca69c4e8181d041d5414d373._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-23T19:26:20Z" + content=""" +Looking in the git-annex branch of this repository, for information about +a key such as MD5E-s10555001--f8bc87e8841634b3d2f9ac0ba85d0a83.nii.gz.log +which is one of the files that fails to download, all there is is this: + + joey@darkstar:/tmp/ds000248#git-annex>cat ./f0b/978/MD5E-s10555001--f8bc87e8841634b3d2f9ac0ba85d0a83.nii.gz.log + 1531766688.020213886s 1 82a4b182-753f-4d93-a59e-20cfdd4d4237 + 1531766681.703832952s 1 e3612a8a-0c48-4374-9bfb-888f4010be54 + +So location log says it's in s3-PUBLIC, but in fact no S3 version id has been recorded. + +And there were old bugs that prevented the recording of the S3 version id. +Notably, it used to be possible to set versioning=yes on an existing S3 +remote, and the files already stored in it necessarily lacked version ids +then. That has been fixed. + +So, is it still possible to reproduce creating a repository with this problem? +"""]] diff --git a/doc/todo/git-annex-repair_claims_success_then_failure.mdwn b/doc/bugs/git-annex-repair_claims_success_then_failure.mdwn similarity index 100% rename from doc/todo/git-annex-repair_claims_success_then_failure.mdwn rename to doc/bugs/git-annex-repair_claims_success_then_failure.mdwn diff --git a/doc/bugs/git-annex_does_not_operate_on_all_keys_in_tuned_repository_with_annex.tune.branchhash1__61__true.mdwn b/doc/bugs/git-annex_does_not_operate_on_all_keys_in_tuned_repository_with_annex.tune.branchhash1__61__true.mdwn new file mode 100644 index 0000000000..7c10599b43 --- /dev/null +++ b/doc/bugs/git-annex_does_not_operate_on_all_keys_in_tuned_repository_with_annex.tune.branchhash1__61__true.mdwn @@ -0,0 +1,91 @@ +### Please describe the problem. + +git annex commands with `--all` option in tuned repository (with `annex.tune.branchhash1=true`) do not do anything. + +### What steps will reproduce the problem? + +1. Initialize a tuned annex repository with `git annex init -c annex.tune.branchhash1=true`. +2. Add some files to annex. +3. Now `git annex whereis --all` and `git annex fsck --all` (and maybe other commands) don't show/do anything. + +### What version of git-annex are you using? On what operating system? + +Version 7.20191230-g985373f8e, compiled from sources, on Debian buster 10.2. + +### Please provide any additional information below. + +[[!format txt """ +~ $ mkdir testdir +~ $ cd testdir +~/testdir $ +~/testdir $ git init +Initialized empty Git repository in /home/test/testdir/.git/ +~/testdir $ +~/testdir $ git annex init -c annex.tune.branchhash1=true testrepo +init testrepo (scanning for unlocked files...) +ok +(recording state in git...) +~/testdir $ +~/testdir $ echo abcabc >file +~/testdir $ +~/testdir $ git annex add file +add file +ok +(recording state in git...) +~/testdir $ +~/testdir $ git commit -m file +[master (root-commit) b910684] file + 1 file changed, 1 insertion(+) + create mode 120000 file +~/testdir $ +~/testdir $ git annex whereis +whereis file (1 copy) + 67d9c35f-e206-404f-a9da-6c94894a4f9f -- testrepo [here] +ok +~/testdir $ +~/testdir $ git annex whereis --all +~/testdir $ +~/testdir $ git annex fsck +fsck file (checksum...) ok +(recording state in git...) +~/testdir $ +~/testdir $ git annex fsck --all +(recording state in git...) +"""]] + +But `--key` option works: + +[[!format txt """ +~/testdir $ git annex lookupkey file +SHA256E-s7--2ed91d820157c0530ffbae54122d998e0de6d958f266b682f7c528942f770470 +~/testdir $ +~/testdir $ git annex whereis --key SHA256E-s7--2ed91d820157c0530ffbae54122d998e0de6d958f266b682f7c528942f770470 +whereis SHA256E-s7--2ed91d820157c0530ffbae54122d998e0de6d958f266b682f7c528942f770470 (1 copy) + 67d9c35f-e206-404f-a9da-6c94894a4f9f -- testrepo [here] +ok +~/testdir $ +~/testdir $ git annex fsck --key SHA256E-s7--2ed91d820157c0530ffbae54122d998e0de6d958f266b682f7c528942f770470 +fsck SHA256E-s7--2ed91d820157c0530ffbae54122d998e0de6d958f266b682f7c528942f770470 (checksum...) ok +(recording state in git...) +"""]] + +Repository status: + +[[!format txt """ +~/testdir $ find .git/annex/objects/ -type f +.git/annex/objects/J3/3f/SHA256E-s7--2ed91d820157c0530ffbae54122d998e0de6d958f266b682f7c528942f770470/SHA256E-s7--2ed91d820157c0530ffbae54122d998e0de6d958f266b682f7c528942f770470 +~/testdir $ +~/testdir $ git ls-tree -r git-annex +100644 blob 20f9faf7ca569d23da5f106a445609d018fa221d activity.log +100644 blob 71f3551b7119daa3c4679d2b790d72b6bc06cbb8 c34/SHA256E-s7--2ed91d820157c0530ffbae54122d998e0de6d958f266b682f7c528942f770470.log +100644 blob d475e423f6fb4863559e8cca981ae8a433f68516 difference.log +100644 blob bf91bd54df30e28f40b49670cf9c9c26ff600a22 uuid.log +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +Of course, I love it! Great project, thanks, Joey! + +However, /me always wants more features from it. It's great that git-annex continues to develop. + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/git-annex_does_not_operate_on_all_keys_in_tuned_repository_with_annex.tune.branchhash1__61__true/comment_1_0e3da1c92dd46e4fc7facd10050fb590._comment b/doc/bugs/git-annex_does_not_operate_on_all_keys_in_tuned_repository_with_annex.tune.branchhash1__61__true/comment_1_0e3da1c92dd46e4fc7facd10050fb590._comment new file mode 100644 index 0000000000..9a9dd2fe56 --- /dev/null +++ b/doc/bugs/git-annex_does_not_operate_on_all_keys_in_tuned_repository_with_annex.tune.branchhash1__61__true/comment_1_0e3da1c92dd46e4fc7facd10050fb590._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-02-14T19:01:50Z" + content=""" +Indeed this is a bug. Easy to see why, note that "3": + + locationLogFileKey path + -- Want only xx/yy/foo.log, not .log files in other places. + | length (splitDirectories (fromRawFilePath path)) /= 3 = Nothing + +So this also affected some other things that use that. Including `git-annex log`, +potentially something to do with v2 upgrade (if a v2 repo could be tuned this way?), +and handling transitions set up by `git annex forget`. + +(I also checked if annex.tune.objecthash1 similarly broke stuff that +enumerated .git/annex/objects, but that was handled ok already.) +"""]] diff --git a/doc/bugs/git_annex_copy_fails_without_error_message_on_macOS___40__but_works_on_Linux__41__.mdwn b/doc/bugs/git_annex_copy_fails_without_error_message_on_macOS___40__but_works_on_Linux__41__.mdwn new file mode 100644 index 0000000000..c971feb821 --- /dev/null +++ b/doc/bugs/git_annex_copy_fails_without_error_message_on_macOS___40__but_works_on_Linux__41__.mdwn @@ -0,0 +1,21 @@ +I have a git repository with a webdav annex remote. I have used this remote to store a few large files, and transfer them to other computers. + +I have done this on three Linux computers on different networks, and it works beautifully! Truly a life-saver! + +However, I today tried setting up a macOS computer, and + + git annex copy --from=myremote + +fails with the simple error message "failed". It downloads about ten megabytes without issue before failing. It then continues with the next file and again downloads a bit (ca. 10 Mb), then fails. The JSON error message has an empty error message field, and the JSON progress messages only contain business-as-usual before the error. + +On the computer that has this problem, I am running macOS 10.15.2 and git-annex 7.20191230. + +I tried WiFi and wired network connections, which doesn't make a difference. The hard drive has enough space for the files. I tried manually selecting various annexed files, but all fail after 10 Mb. + +The same command worked on the linux boxes I set up a few days ago. + +The problem really is that I am getting no meaningful error message for triaging this problem. + +Any help would be appreciated! + +[[!tag moreinfo]] diff --git a/doc/bugs/git_annex_copy_fails_without_error_message_on_macOS___40__but_works_on_Linux__41__/comment_1_6c044f007e66b951cf08b86f9be49ed3._comment b/doc/bugs/git_annex_copy_fails_without_error_message_on_macOS___40__but_works_on_Linux__41__/comment_1_6c044f007e66b951cf08b86f9be49ed3._comment new file mode 100644 index 0000000000..eeecd6a2d7 --- /dev/null +++ b/doc/bugs/git_annex_copy_fails_without_error_message_on_macOS___40__but_works_on_Linux__41__/comment_1_6c044f007e66b951cf08b86f9be49ed3._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-23T20:28:20Z" + content=""" +Adding --debug might provide more information. + +`git annex version` will certianly provide useful information, including +the version of the DAV library git-annex was built with. +"""]] diff --git a/doc/bugs/git_config_merge.ff__61__only_breaks_sync.mdwn b/doc/bugs/git_config_merge.ff__61__only_breaks_sync.mdwn new file mode 100644 index 0000000000..0d0e488ef4 --- /dev/null +++ b/doc/bugs/git_config_merge.ff__61__only_breaks_sync.mdwn @@ -0,0 +1,447 @@ +### Please describe the problem. + +Having globally configured git-merge to only allow fast forward merges breaks git-annex's sync command. + +### What steps will reproduce the problem? + +Run the following script, beware it will change the global git config for `merge.ff`: + +``` +#!/bin/sh +repro() { + rm -rf repo-a repo-b + mkdir repo-a repo-b + ( cd repo-a; git init; git annex init; git remote add repo-b ../repo-b ) + ( cd repo-b; git init; git annex init; git remote add repo-a ../repo-a ) + + # Setup an initial commit a0 in repo-a + ( cd repo-a + touch a0 + git add a0 + git commit -m a0 + git annex sync + ) + + # Pull a0 into repo-b and create commit 'b' on top of it + ( cd repo-b + git annex sync + touch b + git add b + git commit -m b + ) + + # Back in repo-a create a diverging commit 'a1' and try to sync + ( cd repo-a + touch a1 + git add a1 + git commit -m a1 + git annex sync -d + ) +} + +# First try without merge.ff=only +git config --global --unset merge.ff +repro; rv=$? + +# Now with +git config --global merge.ff only +repro || echo "===== Breaks with merge.ff=only =====" +[ $rv -eq 0 ] && echo "===== Works without merge.ff=only =====" +``` + +### What version of git-annex are you using? On what operating system? + +git-annex version 7.20190129-3 from Debian buster + +``` +$ git annex version +git-annex version: 7.20190129 +build flags: Assistant Webapp Pairing S3(multipartupload)(storageclasses) WebDAV Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite +dependency versions: aws-0.20 bloomfilter-2.0.1.0 cryptonite-0.25 DAV-1.3.3 feed-1.0.0.0 ghc-8.4.4 http-client-0.5.13.1 persistent-sqlite-2.8.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar hook external +operating system: linux x86_64 +supported repository versions: 5 7 +upgrade supported from repository versions: 0 1 2 3 4 5 6 +``` + +### Please provide any additional information below. + +A run of the repro script above with `sh -x`: + +[[!format sh """ ++ git config --global --unset merge.ff ++ repro ++ rm -rf repo-a repo-b ++ mkdir repo-a repo-b ++ cd repo-a ++ git init +Initialized empty Git repository in /tmp/repo-a/.git/ ++ git annex init +init ok +(recording state in git...) ++ git remote add repo-b ../repo-b ++ cd repo-b ++ git init +Initialized empty Git repository in /tmp/repo-b/.git/ ++ git annex init +init ok +(recording state in git...) ++ git remote add repo-a ../repo-a ++ cd repo-a ++ touch a0 ++ git add a0 ++ git commit -m a0 +[master (root-commit) 6c82d47] a0 + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 a0 ++ git annex sync +commit +On branch master +nothing to commit, working tree clean +ok +pull repo-b +remote: Enumerating objects: 4, done. +remote: Counting objects: 100% (4/4), done. +remote: Compressing objects: 100% (2/2), done. +remote: Total 3 (delta 0), reused 0 (delta 0) +Unpacking objects: 100% (3/3), done. +From ../repo-b + * [new branch] git-annex -> repo-b/git-annex +ok +(merging repo-b/git-annex into git-annex...) +(recording state in git...) +push repo-b +Enumerating objects: 12, done. +Counting objects: 100% (12/12), done. +Delta compression using up to 8 threads +Compressing objects: 100% (5/5), done. +Writing objects: 100% (9/9), 805 bytes | 805.00 KiB/s, done. +Total 9 (delta 1), reused 0 (delta 0) +To ../repo-b + * [new branch] git-annex -> synced/git-annex + * [new branch] master -> synced/master +ok ++ cd repo-b ++ git annex sync +commit +On branch master + +Initial commit + +nothing to commit +ok +fatal: ambiguous argument 'refs/heads/master..refs/heads/synced/master': unknown revision or path not in the working tree. +Use '--' to separate paths from revisions, like this: +'git [...] -- [...]' +pull repo-a +From ../repo-a + * [new branch] git-annex -> repo-a/git-annex + * [new branch] master -> repo-a/master + * [new branch] synced/master -> repo-a/synced/master + + +Already up to date. +ok ++ touch b ++ git add b ++ git commit -m b +[master d3ecbbf] b + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 b ++ cd repo-a ++ touch a1 ++ git add a1 ++ git commit -m a1 +[master 7c4054b] a1 + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 a1 ++ git annex sync -d +[2020-01-21 12:41:12.808810342] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","git-annex"] +[2020-01-21 12:41:12.810537111] process done ExitSuccess +[2020-01-21 12:41:12.810623305] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--hash","refs/heads/git-annex"] +[2020-01-21 12:41:12.812254921] process done ExitSuccess +[2020-01-21 12:41:12.812457308] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/heads/git-annex..a2ceb59a5879cdf88bd33104c0ee8ac390b3f62e","--pretty=%H","-n1"] +[2020-01-21 12:41:12.813939135] process done ExitSuccess +[2020-01-21 12:41:12.814207234] chat: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","cat-file","--batch"] +[2020-01-21 12:41:12.814533549] chat: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","cat-file","--batch-check=%(objectname) %(objecttype) %(objectsize)"] +[2020-01-21 12:41:12.820975511] read: git ["config","--null","--list"] +[2020-01-21 12:41:12.822586611] process done ExitSuccess +commit +[2020-01-21 12:41:12.823337737] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","commit","-a","-m","git-annex in dxld@Eli:/tmp/repo-a"] +On branch master +nothing to commit, working tree clean +[2020-01-21 12:41:12.851712964] process done ExitFailure 1 +ok +[2020-01-21 12:41:12.851809058] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","symbolic-ref","-q","HEAD"] +[2020-01-21 12:41:12.852844432] process done ExitSuccess +[2020-01-21 12:41:12.85289997] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","refs/heads/master"] +[2020-01-21 12:41:12.853966619] process done ExitSuccess +[2020-01-21 12:41:12.854022004] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--verify","-q","refs/heads/synced/master"] +[2020-01-21 12:41:12.85488393] process done ExitSuccess +[2020-01-21 12:41:12.854935336] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/heads/master..refs/heads/synced/master","--pretty=%H","-n1"] +[2020-01-21 12:41:12.85636602] process done ExitSuccess +pull repo-b +[2020-01-21 12:41:12.85658479] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","fetch","repo-b"] +remote: Enumerating objects: 3, done. +remote: Counting objects: 100% (3/3), done. +remote: Compressing objects: 100% (2/2), done. +remote: Total 2 (delta 0), reused 0 (delta 0) +Unpacking objects: 100% (2/2), done. +From ../repo-b + * [new branch] master -> repo-b/master +[2020-01-21 12:41:12.869425035] process done ExitSuccess +[2020-01-21 12:41:12.86951948] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","branch","-f","synced/master","refs/heads/master"] +[2020-01-21 12:41:12.870907097] process done ExitSuccess +[2020-01-21 12:41:12.870950213] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--verify","-q","refs/remotes/repo-b/master"] +[2020-01-21 12:41:12.871908557] process done ExitSuccess +[2020-01-21 12:41:12.871965108] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/heads/master..refs/remotes/repo-b/master","--pretty=%H","-n1"] +[2020-01-21 12:41:12.873517012] process done ExitSuccess +[2020-01-21 12:41:12.873627395] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--verify","-q","refs/remotes/repo-b/synced/master"] +[2020-01-21 12:41:12.874965525] process done ExitSuccess +[2020-01-21 12:41:12.875039117] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/heads/synced/master..refs/remotes/repo-b/synced/master","--pretty=%H","-n1"] +[2020-01-21 12:41:12.876601605] process done ExitSuccess + +[2020-01-21 12:41:12.876699748] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--hash","refs/heads/master"] +[2020-01-21 12:41:12.878506453] process done ExitSuccess +[2020-01-21 12:41:12.878622705] read: git ["--version"] +[2020-01-21 12:41:12.879681841] process done ExitSuccess +[2020-01-21 12:41:12.879787682] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","merge","--no-edit","refs/remotes/repo-b/master","--allow-unrelated-histories"] +Merge made by the 'recursive' strategy. + b | 0 + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 b +[2020-01-21 12:41:12.884914215] process done ExitSuccess +ok +[2020-01-21 12:41:12.885012011] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","git-annex"] +[2020-01-21 12:41:12.886558493] process done ExitSuccess +[2020-01-21 12:41:12.88678805] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--hash","refs/heads/git-annex"] +[2020-01-21 12:41:12.888312105] process done ExitSuccess +[2020-01-21 12:41:12.888486534] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/heads/git-annex..a2ceb59a5879cdf88bd33104c0ee8ac390b3f62e","--pretty=%H","-n1"] +[2020-01-21 12:41:12.890100289] process done ExitSuccess +[2020-01-21 12:41:12.890310425] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","branch","-f","synced/master","refs/heads/master"] +[2020-01-21 12:41:12.892087109] process done ExitSuccess +[2020-01-21 12:41:12.892162993] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--verify","-q","refs/remotes/repo-b/synced/master"] +[2020-01-21 12:41:12.893271619] process done ExitSuccess +[2020-01-21 12:41:12.893323578] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/remotes/repo-b/synced/master..refs/heads/synced/master","--pretty=%H","-n1"] +[2020-01-21 12:41:12.894429364] process done ExitSuccess +push repo-b +[2020-01-21 12:41:12.894494807] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","push","repo-b","+git-annex:synced/git-annex","master:synced/master"] +Enumerating objects: 6, done. +Counting objects: 100% (6/6), done. +Delta compression using up to 8 threads +Compressing objects: 100% (4/4), done. +Writing objects: 100% (4/4), 472 bytes | 472.00 KiB/s, done. +Total 4 (delta 1), reused 0 (delta 0) +To ../repo-b + 6c82d47..40d7fc6 master -> synced/master +[2020-01-21 12:41:12.939565851] process done ExitSuccess +[2020-01-21 12:41:12.939790389] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","push","repo-b","git-annex"] +[2020-01-21 12:41:12.945667784] process done ExitSuccess +[2020-01-21 12:41:12.945804791] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","push","repo-b","master"] +[2020-01-21 12:41:12.960382215] process done ExitFailure 1 +ok +[2020-01-21 12:41:12.961199639] process done ExitSuccess +[2020-01-21 12:41:12.961575886] process done ExitSuccess ++ rv=0 ++ git config --global merge.ff only ++ repro ++ rm -rf repo-a repo-b ++ mkdir repo-a repo-b ++ cd repo-a ++ git init +Initialized empty Git repository in /tmp/repo-a/.git/ ++ git annex init +init ok +(recording state in git...) ++ git remote add repo-b ../repo-b ++ cd repo-b ++ git init +Initialized empty Git repository in /tmp/repo-b/.git/ ++ git annex init +init ok +(recording state in git...) ++ git remote add repo-a ../repo-a ++ cd repo-a ++ touch a0 ++ git add a0 ++ git commit -m a0 +[master (root-commit) 250bdc3] a0 + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 a0 ++ git annex sync +commit +On branch master +nothing to commit, working tree clean +ok +pull repo-b +warning: no common commits +remote: Enumerating objects: 5, done. +remote: Counting objects: 100% (5/5), done. +remote: Compressing objects: 100% (3/3), done. +remote: Total 5 (delta 1), reused 0 (delta 0) +Unpacking objects: 100% (5/5), done. +From ../repo-b + * [new branch] git-annex -> repo-b/git-annex +ok +(merging repo-b/git-annex into git-annex...) +(recording state in git...) +push repo-b +Enumerating objects: 13, done. +Counting objects: 100% (13/13), done. +Delta compression using up to 8 threads +Compressing objects: 100% (6/6), done. +Writing objects: 100% (11/11), 1023 bytes | 1023.00 KiB/s, done. +Total 11 (delta 0), reused 0 (delta 0) +To ../repo-b + * [new branch] git-annex -> synced/git-annex + * [new branch] master -> synced/master +ok ++ cd repo-b ++ git annex sync +commit +On branch master + +Initial commit + +nothing to commit +ok +fatal: ambiguous argument 'refs/heads/master..refs/heads/synced/master': unknown revision or path not in the working tree. +Use '--' to separate paths from revisions, like this: +'git [...] -- [...]' +pull repo-a +From ../repo-a + * [new branch] git-annex -> repo-a/git-annex + * [new branch] master -> repo-a/master + * [new branch] synced/master -> repo-a/synced/master + + +Already up to date. +ok ++ touch b ++ git add b ++ git commit -m b +[master af18b51] b + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 b ++ cd repo-a ++ touch a1 ++ git add a1 ++ git commit -m a1 +[master 91e5f6f] a1 + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 a1 ++ git annex sync -d +[2020-01-21 12:41:13.526664743] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","git-annex"] +[2020-01-21 12:41:13.52932842] process done ExitSuccess +[2020-01-21 12:41:13.529400247] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--hash","refs/heads/git-annex"] +[2020-01-21 12:41:13.530793109] process done ExitSuccess +[2020-01-21 12:41:13.530950751] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/heads/git-annex..4b03666eac77ff7f709a83289267cc1f28f80391","--pretty=%H","-n1"] +[2020-01-21 12:41:13.532177334] process done ExitSuccess +[2020-01-21 12:41:13.532400895] chat: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","cat-file","--batch"] +[2020-01-21 12:41:13.532647616] chat: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","cat-file","--batch-check=%(objectname) %(objecttype) %(objectsize)"] +[2020-01-21 12:41:13.538048182] read: git ["config","--null","--list"] +[2020-01-21 12:41:13.539367779] process done ExitSuccess +commit +[2020-01-21 12:41:13.540002284] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","commit","-a","-m","git-annex in dxld@Eli:/tmp/repo-a"] +On branch master +nothing to commit, working tree clean +[2020-01-21 12:41:13.568174475] process done ExitFailure 1 +ok +[2020-01-21 12:41:13.568319083] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","symbolic-ref","-q","HEAD"] +[2020-01-21 12:41:13.57005936] process done ExitSuccess +[2020-01-21 12:41:13.5701439] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","refs/heads/master"] +[2020-01-21 12:41:13.571885769] process done ExitSuccess +[2020-01-21 12:41:13.571979878] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--verify","-q","refs/heads/synced/master"] +[2020-01-21 12:41:13.573389866] process done ExitSuccess +[2020-01-21 12:41:13.573486391] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/heads/master..refs/heads/synced/master","--pretty=%H","-n1"] +[2020-01-21 12:41:13.575824245] process done ExitSuccess +pull repo-b +[2020-01-21 12:41:13.575950332] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","fetch","repo-b"] +remote: Enumerating objects: 3, done. +remote: Counting objects: 100% (3/3), done. +remote: Compressing objects: 100% (2/2), done. +remote: Total 2 (delta 0), reused 0 (delta 0) +Unpacking objects: 100% (2/2), done. +From ../repo-b + * [new branch] master -> repo-b/master +[2020-01-21 12:41:13.586295016] process done ExitSuccess +[2020-01-21 12:41:13.586392073] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","branch","-f","synced/master","refs/heads/master"] +[2020-01-21 12:41:13.587730993] process done ExitSuccess +[2020-01-21 12:41:13.5877987] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--verify","-q","refs/remotes/repo-b/master"] +[2020-01-21 12:41:13.588901362] process done ExitSuccess +[2020-01-21 12:41:13.588969352] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/heads/master..refs/remotes/repo-b/master","--pretty=%H","-n1"] +[2020-01-21 12:41:13.5904904] process done ExitSuccess +[2020-01-21 12:41:13.590563378] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--verify","-q","refs/remotes/repo-b/synced/master"] +[2020-01-21 12:41:13.591686652] process done ExitSuccess +[2020-01-21 12:41:13.591740872] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/heads/synced/master..refs/remotes/repo-b/synced/master","--pretty=%H","-n1"] +[2020-01-21 12:41:13.593110565] process done ExitSuccess + +[2020-01-21 12:41:13.593182929] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--hash","refs/heads/master"] +[2020-01-21 12:41:13.594562053] process done ExitSuccess +[2020-01-21 12:41:13.594660841] read: git ["--version"] +[2020-01-21 12:41:13.595595012] process done ExitSuccess +[2020-01-21 12:41:13.59568225] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","merge","--no-edit","refs/remotes/repo-b/master","--allow-unrelated-histories"] +fatal: Not possible to fast-forward, aborting. +[2020-01-21 12:41:13.597312517] process done ExitFailure 128 +[2020-01-21 12:41:13.597552001] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","ls-files","--unmerged","-z","--","."] +[2020-01-21 12:41:13.598903873] process done ExitSuccess +[2020-01-21 12:41:13.598967865] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","ls-files","--deleted","-z","--","."] +[2020-01-21 12:41:13.600107434] process done ExitSuccess +failed +[2020-01-21 12:41:13.600185848] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","git-annex"] +[2020-01-21 12:41:13.601485474] process done ExitSuccess +[2020-01-21 12:41:13.601548875] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--hash","refs/heads/git-annex"] +[2020-01-21 12:41:13.602954053] process done ExitSuccess +[2020-01-21 12:41:13.603111747] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/heads/git-annex..4b03666eac77ff7f709a83289267cc1f28f80391","--pretty=%H","-n1"] +[2020-01-21 12:41:13.604578405] process done ExitSuccess +[2020-01-21 12:41:13.60470379] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","branch","-f","synced/master","refs/heads/master"] +[2020-01-21 12:41:13.606371207] process done ExitSuccess +[2020-01-21 12:41:13.606457892] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","show-ref","--verify","-q","refs/remotes/repo-b/synced/master"] +[2020-01-21 12:41:13.607658403] process done ExitSuccess +[2020-01-21 12:41:13.607726481] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","log","refs/remotes/repo-b/synced/master..refs/heads/synced/master","--pretty=%H","-n1"] +[2020-01-21 12:41:13.609097845] process done ExitSuccess +push repo-b +[2020-01-21 12:41:13.60918811] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","push","repo-b","+git-annex:synced/git-annex","master:synced/master"] +Enumerating objects: 3, done. +Counting objects: 100% (3/3), done. +Delta compression using up to 8 threads +Compressing objects: 100% (2/2), done. +Writing objects: 100% (2/2), 231 bytes | 231.00 KiB/s, done. +Total 2 (delta 0), reused 0 (delta 0) +To ../repo-b + 250bdc3..91e5f6f master -> synced/master +[2020-01-21 12:41:13.651508284] process done ExitSuccess +[2020-01-21 12:41:13.651669133] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","push","repo-b","git-annex"] +[2020-01-21 12:41:13.657342122] process done ExitSuccess +[2020-01-21 12:41:13.657603861] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","push","repo-b","master"] +[2020-01-21 12:41:13.663212647] process done ExitFailure 1 +To ../repo-b + ! [rejected] master -> master (non-fast-forward) +error: failed to push some refs to '../repo-b' +hint: Updates were rejected because the tip of your current branch is behind +hint: its remote counterpart. Integrate the remote changes (e.g. +hint: 'git pull ...') before pushing again. +hint: See the 'Note about fast-forwards' in 'git push --help' for details. +ok +[2020-01-21 12:41:13.663856234] process done ExitSuccess +[2020-01-21 12:41:13.664249322] process done ExitSuccess +git-annex: sync: 1 failed ++ echo ===== Breaks with merge.ff=only ===== +===== Breaks with merge.ff=only ===== ++ [ 0 -eq 0 ] ++ echo ===== Works without merge.ff=only ===== +===== Works without merge.ff=only ===== +# End of transcript or log. +"""]] + +Notice the call to git-merge, failing: + +``` +[2020-01-21 12:41:13.59568225] call: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","merge","--no-edit","refs/remotes/repo-b/master","--allow-unrelated-histories"] +fatal: Not possible to fast-forward, aborting. +``` + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +Sure! Using git-annex to keep artifacts in development repos works great usually :) diff --git a/doc/bugs/git_config_merge.ff__61__only_breaks_sync/comment_1_53fd805e1afcd958134b5774550e7644._comment b/doc/bugs/git_config_merge.ff__61__only_breaks_sync/comment_1_53fd805e1afcd958134b5774550e7644._comment new file mode 100644 index 0000000000..a5c94ec3a6 --- /dev/null +++ b/doc/bugs/git_config_merge.ff__61__only_breaks_sync/comment_1_53fd805e1afcd958134b5774550e7644._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-21T18:31:22Z" + content=""" +I feel it's right for git-annex sync to honor git configs, so it's right +for it to not merge origin/master. And, without that merge, it's right for +it to fail to push master to origin. Since it does push synced/master, this +does not prevent other clones of the repo, where git-annex sync is later +ran, from getting the changes made by this sync. + +That leaves only this ugly thing: + +fatal: ambiguous argument 'refs/heads/master..refs/heads/synced/master': unknown revision or path not in the working tree. + +Which comes from Git.Branch.changed, but I'm not clear how the fast forward +configuration would prevent either of those refs from existing. +"""]] diff --git a/doc/bugs/git_config_merge.ff__61__only_breaks_sync/comment_2_db82f6c4a27ac8ee862b5aaa45670085._comment b/doc/bugs/git_config_merge.ff__61__only_breaks_sync/comment_2_db82f6c4a27ac8ee862b5aaa45670085._comment new file mode 100644 index 0000000000..337e648c09 --- /dev/null +++ b/doc/bugs/git_config_merge.ff__61__only_breaks_sync/comment_2_db82f6c4a27ac8ee862b5aaa45670085._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="dxld" + avatar="http://cdn.libravatar.org/avatar/742547a848e15c9f7fb381191c239141" + subject="comment 2" + date="2020-01-21T19:28:29Z" + content=""" +Honestly I feel like the (perceived) semantics of sync are broken by this behaviour. I would expect git-annex to do what it has to to make what I asked for happen. + +I agree that in general it's a good thing not to needlessly override git settings but for the sync command I really don't see any way that not merging can be considered sensible behaviour. To me as a user it just feels like I changed a setting completely unrelated to git-annex-sync and suddenly sync broke. + +Consider this: the git-annex-sync(1) man page never actually mentions that it will run git-merge. On the other hand git-pull(1) is very forthcoming with the fact that it's just a shorthand for `git fetch; git merge` so it's obvious to me that settings affecting merge will affect git-pull, not so for sync. + +I've been unable to sync my git-annex repos for a couple of months now because of this issue so firmly believe this is a serious usabiliy issue. + +At the very least we have a documentation issue here. Though I would still argue the behaviour is bonkers :) + +"""]] diff --git a/doc/bugs/git_keeps_refreshing_index/comment_6_e0bfde0d53042ce8d310f356f88c610b._comment b/doc/bugs/git_keeps_refreshing_index/comment_6_e0bfde0d53042ce8d310f356f88c610b._comment new file mode 100644 index 0000000000..b561288e73 --- /dev/null +++ b/doc/bugs/git_keeps_refreshing_index/comment_6_e0bfde0d53042ce8d310f356f88c610b._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="t+gitannex@1d62779e8b54f30a854739f61542a6885167b01f" + nickname="t+gitannex" + avatar="http://cdn.libravatar.org/avatar/87c7f62c00e4a744aa500423e421120f" + subject="comment 6" + date="2020-02-06T11:07:34Z" + content=""" +I'm able to reproduce this with git annex 7.20191230 and git 2.25.0 on Arch Linux, but I've had it on OSX in the past as well. The annex uses a v7 repository. I don't need to do anything besides unlocking some files and running git status. Unlocking 10 files, git status takes 3s and with 85 files it takes 20s, so it seems to scale linearly with the no of files. Happy to share more details about the repository if it's useful. +"""]] diff --git a/doc/bugs/leaks_git_config_error_message_upon_inability_to_read_downloaded___34__config__34___file.mdwn b/doc/bugs/leaks_git_config_error_message_upon_inability_to_read_downloaded___34__config__34___file.mdwn index d93c3623a5..9457d4b8bf 100644 --- a/doc/bugs/leaks_git_config_error_message_upon_inability_to_read_downloaded___34__config__34___file.mdwn +++ b/doc/bugs/leaks_git_config_error_message_upon_inability_to_read_downloaded___34__config__34___file.mdwn @@ -1,3 +1,5 @@ +[[!meta title="http remotes that require authentication are not yet supported"]] + It is not a ground shaking issue, but probably would be best to handle it more gracefully. Initially mentioned while doing install using datalad. Account/permission is required to access this particular repo, ask Canadians for access if you don't have it yet Joey. credentials I guess got asked for and cached by git upon initial invocation, so upon subsequent calls didn't ask for any: @@ -143,4 +145,6 @@ git annex 7.20190819+git2-g908476a9b-1~ndall+1 and the same with bleeding edge 7 [[!meta author=yoh]] [[!tag projects/dandi]] - +> [[done]]; the error message is improved and also git remotes that need +> http basic auth to access will get password from `git credential`. +> --[[Joey]] diff --git a/doc/bugs/leaks_git_config_error_message_upon_inability_to_read_downloaded___34__config__34___file/comment_3_3df07b6c8ef25ad96ff4f198931347ad._comment b/doc/bugs/leaks_git_config_error_message_upon_inability_to_read_downloaded___34__config__34___file/comment_3_3df07b6c8ef25ad96ff4f198931347ad._comment new file mode 100644 index 0000000000..7be8cac1f4 --- /dev/null +++ b/doc/bugs/leaks_git_config_error_message_upon_inability_to_read_downloaded___34__config__34___file/comment_3_3df07b6c8ef25ad96ff4f198931347ad._comment @@ -0,0 +1,31 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-01-22T16:04:37Z" + content=""" +git-annex could use `git credential` if the config download fails with +401 unauthorized and then retry with the credentials. (The git-lfs special +remote already does this.) And it would also need to do the same thing +when getting a key from the remote. + +But that would not help with the https://git.bic.mni.mcgill.ca example, +apparently, because there's no 401, but a 302 redirect to a 200, +that is indistingishable from a successful download. + +Yeah, when git-annex expects a git config, if it doesn't parse as one, +it could retry, asking for credentials. +But that seems asking for trouble: what if it fails to parse for +another reason, maybe the web server served up something other than the +expected config, maybe a captive portal got in the way. There would be a +username/password prompt that doesn't make sense to the user at all. + +And if this happens in a key download, git-annex certianly has no way to +tell that what it downloaded is not intended as the content of a key, +short of verifying the content, and failure to verify certainly doesn't +justify prompting for a username/password. + +So, I am not comfortable with falling back to ask for credentials unless +I've seen a http status code that indicates they are necessary. +And IMHO gitlab's use of a 302 redirect to a login page is a bug in +gitlab, and will need to be fixed there, or a better http server used. +"""]] diff --git a/doc/bugs/leaks_git_config_error_message_upon_inability_to_read_downloaded___34__config__34___file/comment_4_051d22f0bd0eb601476cd3b74a9b98b8._comment b/doc/bugs/leaks_git_config_error_message_upon_inability_to_read_downloaded___34__config__34___file/comment_4_051d22f0bd0eb601476cd3b74a9b98b8._comment new file mode 100644 index 0000000000..956baeb592 --- /dev/null +++ b/doc/bugs/leaks_git_config_error_message_upon_inability_to_read_downloaded___34__config__34___file/comment_4_051d22f0bd0eb601476cd3b74a9b98b8._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""re: related: shouldn't git annex try external remotes to download config?""" + date="2020-01-22T16:31:16Z" + content=""" +No, the external special remote protocol is not aimed at downloading git +config files. Anyway, this code path is never involved with using +special remotes; the uuid of a special remote is known and so there is no +need to ever download a git config file to discover it. +"""]] diff --git a/doc/bugs/on_some_remotes_failing_to_detect_annex_spits_out_message_to_stderr_and_empty_lines_to_stderr__44___ignores_--json-error-messages.mdwn b/doc/bugs/on_some_remotes_failing_to_detect_annex_spits_out_message_to_stderr_and_empty_lines_to_stderr__44___ignores_--json-error-messages.mdwn index 4c4fca407a..57bb645ee0 100644 --- a/doc/bugs/on_some_remotes_failing_to_detect_annex_spits_out_message_to_stderr_and_empty_lines_to_stderr__44___ignores_--json-error-messages.mdwn +++ b/doc/bugs/on_some_remotes_failing_to_detect_annex_spits_out_message_to_stderr_and_empty_lines_to_stderr__44___ignores_--json-error-messages.mdwn @@ -54,3 +54,7 @@ PS ignore "ignores --json-error-messages" part of the subject -- was detected in [[!meta author=yoh]] [[!tag projects/datalad]] + +> [[fixed|done]] what I consider to be the core issue in this bug report. +> --[[Joey]] + diff --git a/doc/bugs/on_some_remotes_failing_to_detect_annex_spits_out_message_to_stderr_and_empty_lines_to_stderr__44___ignores_--json-error-messages/comment_1_a7ad806ceb76f13ff8fdcce62bc0ed8a._comment b/doc/bugs/on_some_remotes_failing_to_detect_annex_spits_out_message_to_stderr_and_empty_lines_to_stderr__44___ignores_--json-error-messages/comment_1_a7ad806ceb76f13ff8fdcce62bc0ed8a._comment new file mode 100644 index 0000000000..666d6a54e8 --- /dev/null +++ b/doc/bugs/on_some_remotes_failing_to_detect_annex_spits_out_message_to_stderr_and_empty_lines_to_stderr__44___ignores_--json-error-messages/comment_1_a7ad806ceb76f13ff8fdcce62bc0ed8a._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-22T15:11:05Z" + content=""" +This error message is not specific to a particular file in the repository, so if +git-annex get outputs it, it doesn't help for the error message to be +wrapped up in json. The actual purpose of --json-error messages is being +able to correlate a failure to eg, get a particular file with an error +message related to that action. Not in avoiding all possible stderr. + +The actual bug here is that it dumps git config to stderr at all. + +---- + +The extra newlines are output to stdout, so not a problem WRT stderr. +"""]] diff --git a/doc/bugs/on_some_remotes_failing_to_detect_annex_spits_out_message_to_stderr_and_empty_lines_to_stderr__44___ignores_--json-error-messages/comment_2_495820d777c889f88466d725c11895f9._comment b/doc/bugs/on_some_remotes_failing_to_detect_annex_spits_out_message_to_stderr_and_empty_lines_to_stderr__44___ignores_--json-error-messages/comment_2_495820d777c889f88466d725c11895f9._comment new file mode 100644 index 0000000000..b68dcc0b2c --- /dev/null +++ b/doc/bugs/on_some_remotes_failing_to_detect_annex_spits_out_message_to_stderr_and_empty_lines_to_stderr__44___ignores_--json-error-messages/comment_2_495820d777c889f88466d725c11895f9._comment @@ -0,0 +1,7 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-01-22T17:04:09Z" + content=""" +Error message has been improved. +"""]] diff --git a/doc/bugs/signal_weirdness.mdwn b/doc/bugs/signal_weirdness.mdwn index 5a00963343..890767386c 100644 --- a/doc/bugs/signal_weirdness.mdwn +++ b/doc/bugs/signal_weirdness.mdwn @@ -48,3 +48,7 @@ as a less bad alternative. Still, I'd like to find a better one. --[[Joey]] [[!tag confirmed]] + +> [[done]], I think this was fixed long ago, git-annex no longer installs a +> sigint handler and I interrupt it all the time and it behaves as I would +> expect and not as shown here --[[Joey]] diff --git a/doc/bugs/standalone_build_is_slightly_out_of_date.mdwn b/doc/bugs/standalone_build_is_slightly_out_of_date.mdwn index 97b2e0116d..939d5b0251 100644 --- a/doc/bugs/standalone_build_is_slightly_out_of_date.mdwn +++ b/doc/bugs/standalone_build_is_slightly_out_of_date.mdwn @@ -44,3 +44,5 @@ $> git -C ~/proj/git-annex describe --contains 6.20180808-ga1327779a [[!meta author=yoh]] [[!tag projects/datalad]] +> I see no reason to have this open half a year and many many releases later, so +> [[done]] --[[Joey]] diff --git a/doc/bugs/unable_to_get_from_public_S3_remote_without_clear_reasoning_why/comment_1_87983a598cacaa8800db4a01ec23ba1b._comment b/doc/bugs/unable_to_get_from_public_S3_remote_without_clear_reasoning_why/comment_1_87983a598cacaa8800db4a01ec23ba1b._comment new file mode 100644 index 0000000000..d88c251786 --- /dev/null +++ b/doc/bugs/unable_to_get_from_public_S3_remote_without_clear_reasoning_why/comment_1_87983a598cacaa8800db4a01ec23ba1b._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-23T18:59:53Z" + content=""" +It kind of looks like this S3 remote is being used without login +credentials. That's not clear, because you could have environment variables +set or creds cached, or not. + +Since it's "public: yes", it would fall back to the public url code path to +get the file when there are no creds. + +There are a lot of ways that could fail. Eg, perhaps no publicurl has been +configured for the remote. But in that case (and many other cases), +an exception should be thrown and displayed. + +I think I need to know how to reproduce this to get any further. +"""]] diff --git a/doc/bugs/warning_about_ssh_caching_keeps_showing.mdwn b/doc/bugs/warning_about_ssh_caching_keeps_showing.mdwn new file mode 100644 index 0000000000..987a853cd1 --- /dev/null +++ b/doc/bugs/warning_about_ssh_caching_keeps_showing.mdwn @@ -0,0 +1,59 @@ +### Please describe the problem. +I keep getting the warning about ssh caching being disabled, even when I explicitly enable it. + +### What steps will reproduce the problem? +See log below + +### What version of git-annex are you using? On what operating system? +7.20200204 on Amazon Linux 2 + +### Please provide any additional information below. + +[[!format sh """ +# If you can, paste a complete transcript of the problem occurring here. +# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log + +(just-git-annex-env) 13:00 [viral-ngs-benchmarks] $ git annex sync -c annex.sshcaching=true +On branch is-devel +Your branch is up to date with 'origin/is-devel'. + + +It took 8.50 seconds to enumerate untracked files. 'status -uno' +may speed it up, but you have to be careful not to forget to add +new files yourself (see 'git help status'). +nothing to commit, working tree clean +commit ok +pull origin + You have enabled concurrency, but ssh connection caching is not enabled. This may result in multiple ssh processes prompting for pas\ +swords at the same time. +ok + +(just-git-annex-env) 13:00 [viral-ngs-benchmarks] $ uname -a +Linux ip-172-31-86-201.ec2.internal 4.14.165-131.185.amzn2.x86_64 #1 SMP Wed Jan 15 14:19:56 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux +(just-git-annex-env) 13:02 [viral-ngs-benchmarks] $ git annex version +git-annex version: 7.20200204-g4db801d +build flags: Assistant Webapp Pairing S3 WebDAV Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite +dependency versions: aws-0.21.1 bloomfilter-2.0.1.0 cryptonite-0.25 DAV-1.3.3 feed-1.0.1.0 ghc-8.6.5 http-client-0.5.14 persistent-sql\ +ite-2.9.3 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_2\ +24 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE\ +2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224\ + BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs hook external +operating system: linux x86_64 +supported repository versions: 7 +upgrade supported from repository versions: 0 1 2 3 4 5 6 +local repository version: 7 + + + +# End of transcript or log. +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +I've been using git-annex for 1.5 years to manage bioinformatics analyses. It's a very versatile and well-designed tool. I've been able to adapt it to many use cases; +the ability to easily write your own external backends has been especially helpful for that. The amount of work and thought that has gone into designing/building git-annex is +enormous, and very much appreciated. + +> [[done]]; see comment --[[Joey]] diff --git a/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_1_711a8e784ba8fde0164469fa08176687._comment b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_1_711a8e784ba8fde0164469fa08176687._comment new file mode 100644 index 0000000000..b09d81c481 --- /dev/null +++ b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_1_711a8e784ba8fde0164469fa08176687._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-02-14T17:48:41Z" + content=""" +It seems you must have annex.jobs set to something, since concurrency +is enabled without any -J option, so the easy fix is just to unset that. + +It kind of looks like your build of git-annex may have been made without +ssh connection caching support, which would happen if its configure program +detected at build time that ssh doesn't support it. + +That would be unusual if so, all the builds of git-annex that I'm aware of +are made with ssh that does support it. + +There are a couple of even less likely scenarios, like +`GIT_ANNEX_SSH_SOCKET_DIR` being set to a directory you can't write to. + +I've changed the code to always say explicitly why ssh caching can't be +enabled. I also let annex.sshcaching override the build-time detection. + +I guess that's enough to close this, unless it turns out its +reasons for not enabling it are not one of those I mentioned above, but +something entirely bogus. +"""]] diff --git a/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_2_43b651b09b91986d5cb4ae14c83017c7._comment b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_2_43b651b09b91986d5cb4ae14c83017c7._comment new file mode 100644 index 0000000000..c2335f2b14 --- /dev/null +++ b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_2_43b651b09b91986d5cb4ae14c83017c7._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="ssh caching" + date="2020-02-14T20:53:27Z" + content=""" +\"your build of git-annex may have been made without ssh connection caching support, which would happen if its configure program detected at build time that ssh doesn't support it\" -- yes, according to the [build log](https://dev.azure.com/conda-forge/84710dde-1620-425b-80d0-4cf5baca359d/_apis/build/builds/117561/logs/8). I can add an ssh dependency to the conda-forge git-annex recipe. It would be more flexible to not have that dependency and instead to have git-annex's behavior depend on the ssh available at runtime; but, I guess there's a reason it's a compile-time option? + +Also, I don't have ssh prompting for passwords since I use ssh-agent, and having the warning shown every time is distracting. Maybe, a config option could be added to disable the warning? +"""]] diff --git a/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_3_49182db51bd86f0c60fadde5e4643f77._comment b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_3_49182db51bd86f0c60fadde5e4643f77._comment new file mode 100644 index 0000000000..0b8e66f301 --- /dev/null +++ b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_3_49182db51bd86f0c60fadde5e4643f77._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 3" + date="2020-02-15T04:45:01Z" + content=""" +Ilya, you wrote \"the ability to easily write your own external **backends** has been especially helpful\". Did you mean \"external **remotes**\"? since \"external backends\" are yet [TODO AFAIK](https://git-annex.branchable.com/todo/external_backends/) +"""]] diff --git a/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_4_ab823204f02dd26ebfe0613f711e0f9d._comment b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_4_ab823204f02dd26ebfe0613f711e0f9d._comment new file mode 100644 index 0000000000..9a029fefbf --- /dev/null +++ b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_4_ab823204f02dd26ebfe0613f711e0f9d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="comment 4" + date="2020-02-16T04:05:11Z" + content=""" +Yes, I meant external remotes. +"""]] diff --git a/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_5_24f7717029ef520f932123130088d76d._comment b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_5_24f7717029ef520f932123130088d76d._comment new file mode 100644 index 0000000000..5dfc6d355f --- /dev/null +++ b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_5_24f7717029ef520f932123130088d76d._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2020-02-17T16:29:58Z" + content=""" +I think the idea with detecting at build time is that if git-annex is being +built on a platform where ssh doesn't support it, eg because it's not +openssh but some other ssh implementation, it might as well compile out +support rather than fail obscurely when it tries to use it. And it's +uncommon for the systems where a program is built and used to have +different ssh implementations, so runtime probing would only slow it +down. (git-annex makes similar assumptions about eg, `cp --reflink` being +supported or not, and I don't think it's very unusual to probe OS features +at compile time.) + +The warning seems useful, because here we've discovered that you have been +building git-annex without support for ssh caching all along! + +The way to disable the warning is to set annex.sshcaching=true +(after [[!commit a4909470688287fc0009eaf82dab2e108bd214f1]]). +"""]] diff --git a/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_6_0b316eb8b0436350ce7fbec5093975d1._comment b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_6_0b316eb8b0436350ce7fbec5093975d1._comment new file mode 100644 index 0000000000..ea6b1c9082 --- /dev/null +++ b/doc/bugs/warning_about_ssh_caching_keeps_showing/comment_6_0b316eb8b0436350ce7fbec5093975d1._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="git-annex build-time dependencies" + date="2020-02-18T19:33:32Z" + content=""" +\"git-annex makes similar assumptions about eg, `cp --reflink` being supported or not, and I don't think it's very unusual to probe OS features at compile time\" -- this works well for package managers tied to specific distros. But consider something like [[install/conda]] that creates packages meant to be installed on a variety of systems. I can add a run-time dependency on `coreutils` to ensure that `cp --reflink` works, but I'm a bit wary about requiring git-annex users to replace all core utils with conda-forge ones. For one, these may be slower, being compiled for a generic architecture. For two, if they're not fully backwards-compatible, they make break some assumptions relied on by other parts of the distro. +"""]] diff --git a/doc/bugs/windows_autostart.mdwn b/doc/bugs/windows_autostart.mdwn new file mode 100644 index 0000000000..f5b7a10f75 --- /dev/null +++ b/doc/bugs/windows_autostart.mdwn @@ -0,0 +1,4 @@ +I installed git-annex in windows using the file git-annex-installer.exe, and now each time I'm starting my computer I get a message telling me that "C:\Program Files\Git\cmd\git-annex-autostart.vbs" cannot be found. + +This is very annoying and I don't need git-annex to be started at startup. I looked in msconfig.exe and I didn't find any entry for git-annex. Is there a way to disable this? + diff --git a/doc/bugs/windows_autostart/comment_1_80a2561901554ddb45db719c086629b2._comment b/doc/bugs/windows_autostart/comment_1_80a2561901554ddb45db719c086629b2._comment new file mode 100644 index 0000000000..f529a22c84 --- /dev/null +++ b/doc/bugs/windows_autostart/comment_1_80a2561901554ddb45db719c086629b2._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="jeanpmbox-456@7222359de8d1f37a7cf25a519e8faf90a9517b50" + nickname="jeanpmbox-456" + avatar="http://cdn.libravatar.org/avatar/164eb4254c5f83d95d3e0b810ff7aab9" + subject="comment 1" + date="2020-02-01T11:35:37Z" + content=""" +I finally saw thanks to the file `Build/NullSoftInstaller.hs` and to NirSoft Program WhatInStartup that the startup script is located in `%APPDATA%\Microsoft\Windows\Start Menu\Programs\Startup`. + +It would be nice to have an option to activate this or not in the installation. +"""]] diff --git a/doc/contribute.mdwn b/doc/contribute.mdwn index 6441f497d0..1a31a3e533 100644 --- a/doc/contribute.mdwn +++ b/doc/contribute.mdwn @@ -69,6 +69,6 @@ As Haskell programs go, git-annex does not use too many advanced features. [Learn You A Haskell](http://learnyouahaskell.com/) will teach you enough to get started. -Of course git-annex does use monads, and particularly the `Annex``monad +Of course git-annex does use monads, and particularly the `Annex` monad which gives access to its state about the git repository as well as lower-level IO. diff --git a/doc/contribute/comment_3_e7219514f52207fcfb97aeec03241e8d._comment b/doc/contribute/comment_3_e7219514f52207fcfb97aeec03241e8d._comment new file mode 100644 index 0000000000..513f6e5ae5 --- /dev/null +++ b/doc/contribute/comment_3_e7219514f52207fcfb97aeec03241e8d._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="Dan" + avatar="http://cdn.libravatar.org/avatar/986de9e060699ae70ff7c31342393adc" + subject="Editing Comments?" + date="2020-02-14T01:22:46Z" + content=""" +Is it possible to edit comments on the branchable wiki? I realized there was a sentence I failed to finish when posting [this comment](https://git-annex.branchable.com/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/#comment-19feab1afb2e0b33315a8368a7cdebf7) and I'd love to go back and finish the thought. The \"Edit\" button at the top of the page lets me edit the content of the page, but not any of the comments. + +I tried cloning the wiki, editing the file corresponding to my comment, and then pushing, but the push was rejected (the changes were in doc tree so I expected it to be accepted, but perhaps comments are more locked down). +"""]] diff --git a/doc/contribute/comment_4_21a75c774757f27911a9249f9b42368d._comment b/doc/contribute/comment_4_21a75c774757f27911a9249f9b42368d._comment new file mode 100644 index 0000000000..fe091d91db --- /dev/null +++ b/doc/contribute/comment_4_21a75c774757f27911a9249f9b42368d._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""@Dan""" + date="2020-02-17T17:05:03Z" + content=""" +That might be an ikiwiki issue, because you're supposed to be able to git +push any change that a user could make on the web, and I think anyone can +delete a comment and then make a new comment in its place. Probably +two pushes would work ... or just copy and then delete the comment and paste +into a new comment. +"""]] diff --git a/doc/contribute/comment_5_c5f7f809da5829f4c259d0a630f0f722._comment b/doc/contribute/comment_5_c5f7f809da5829f4c259d0a630f0f722._comment new file mode 100644 index 0000000000..72378f3e7c --- /dev/null +++ b/doc/contribute/comment_5_c5f7f809da5829f4c259d0a630f0f722._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Dan" + avatar="http://cdn.libravatar.org/avatar/986de9e060699ae70ff7c31342393adc" + subject="Thanks" + date="2020-02-17T17:52:20Z" + content=""" +Got it! In this case, since the [conversation](https://git-annex.branchable.com/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/#comment-a232b074bb04a942903468cf0d7a13b1) has now moved on, I'll just complete my thought in reply to your most recent comment, but I'll use the workflow you suggested in the future if I need to edit a comment. +"""]] diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn index ddaad9ae63..9b69300d8f 100644 --- a/doc/design/external_special_remote_protocol.mdwn +++ b/doc/design/external_special_remote_protocol.mdwn @@ -54,8 +54,9 @@ could have its own protocol extension details, but none are currently used. EXTENSIONS Next, git-annex will generally send a message telling the special -remote to start up. (Or it might send an INITREMOTE or EXPORTSUPPORTED, -or perhaps other things in the future, so don't hardcode this order.) +remote to start up. (Or it might send an INITREMOTE or EXPORTSUPPORTED or +LISTCONFIGS, or perhaps other things in the future, so don't hardcode this +order.) PREPARE @@ -116,9 +117,9 @@ The following requests *must* all be supported by the special remote. Indicates that INITREMOTE failed. * `PREPARE` Tells the remote that it's time to prepare itself to be used. - Only a few requests for details about the remote can come before this. - Those include EXTENSIONS, INITREMOTE, and EXPORTSUPPORTED, but others - may be added later. + Only a few requests for details about the remote can come before this + (EXTENSIONS, INITREMOTE, EXPORTSUPPORTED, and LISTCONFIGS, + but others may be added later). * `PREPARE-SUCCESS` Sent as a response to PREPARE once the special remote is ready for use. * `PREPARE-FAILURE ErrorMsg` @@ -173,6 +174,19 @@ the special remote can reply with `UNSUPPORTED-REQUEST`. Sent in response to a EXTENSIONS request, the List could be used to indicate protocol extensions that the special remote uses, but there are currently no such extensions. +* `LISTCONFIGS` + Requests the remote to return a list of settings it uses (with + `GETCONFIG` and `SETCONFIG`). Providing a list makes `git annex initremote` + work better, because it can check the user's input, and can also display + a list of settings with descriptions. Note that the user is not required + to provided all the settings listed here. A block of responses + can be made to this, which must always end with `CONFIGSEND`. + * `CONFIG Name Description` + Indicates the name and description of a config setting. The description + should be reasonably short. Example: + "CONFIG directory store data here" + * `CONFIGEND` + Indicates the end of the response block. * `GETCOST` Requests the remote to return a use cost. Higher costs are more expensive. (See Config/Cost.hs for some standard costs.) @@ -283,6 +297,8 @@ handling a request. Gets one of the special remote's configuration settings, which can have been passed by the user when running `git annex initremote`, or can have been set by a previous SETCONFIG. Can be run at any time. + It's recommended that special remotes that use this implement + LISTCONFIGS. (git-annex replies with VALUE followed by the value. If the setting is not set, the value will be empty.) * `SETCREDS Setting User Password` diff --git a/doc/devblog/day_615__new_year.mdwn b/doc/devblog/day_615__new_year.mdwn new file mode 100644 index 0000000000..f36668383b --- /dev/null +++ b/doc/devblog/day_615__new_year.mdwn @@ -0,0 +1,21 @@ +The release of git-annex with all the ByteString optimisations went out +earlier this week. The Windows autobuilder was down and I didn't try to get +it building on Windows, so fixed that today, luckily all those changes only +broke a few bits of Windows-specific code. + +Also today, I added git-annex add --force-annex/--force-git options. +These do the same thing as -c annex.largefiles=anthing etc, but +are easier to type and may avoid some tricky git behavior in some edge +cases. + +I'd kind of like to get back to v8 this month and perhaps release it. +There's a `v8` branch now, which as well as the sqlite changes adds a new +annex.dotfiles config setting, and cleans up the special cases around +adding dotfiles. Anyone not using git-annex to manage large dotfiles +(or files in dotdirs) won't be impacted, but those who do will need to +enable annex.dotfiles and configure annex.largefiles to match the dotfiles +they want annexed. There is a risk that someone who's in the habit of +running `git annex add .dotfile` to add them to the annex will be surprised +when the new version adds them to git because they've not done the necessary +configuration. I'm still mulling over whether this is an acceptable risk +to mostly de-uglify and de-special-case dotfiles. diff --git a/doc/devblog/day_616__remote_config_parsing.mdwn b/doc/devblog/day_616__remote_config_parsing.mdwn new file mode 100644 index 0000000000..858b53d206 --- /dev/null +++ b/doc/devblog/day_616__remote_config_parsing.mdwn @@ -0,0 +1,9 @@ +I'm in the middle of a big change to internals. Remotes have buried inside +them a string-based configuration, and those settings are only parsed when +they're used, so bad configuration is often ignored rather than being +detected when the user inputs it. The parsing is moving to happen upfront. + +This is something I could not have done when I first wrote git-annex, +because the values that get parsed have many different types, so how can a +single Remote data type contain those, whatever they are? Now I know how +to use the Typeable class to do such things. diff --git a/doc/devblog/day_617__remote_config_parsing_continued.mdwn b/doc/devblog/day_617__remote_config_parsing_continued.mdwn new file mode 100644 index 0000000000..48d41d0f81 --- /dev/null +++ b/doc/devblog/day_617__remote_config_parsing_continued.mdwn @@ -0,0 +1,20 @@ +This has been a big change, I'm now 3 days and a 3000 line diff in and I +finally got all the remote configuration settings converted to the new +up-front parsing. + +Seems like quite a lot of work, since the only user-visible improvement is +these error messages: + + # git annex initremote demo type=directory directory=../foo encryption=none foo=bar + initremote demo + git-annex: Unexpected fields: foo + + # git annex initremote demo type=directory directory=../foo encryption=none exporttree=true + initremote demo + git-annex: Bad value for exporttree (expected yes or no) + +But this involved paying down technical debt in a big code base, so of course it was +expensive. + +Anyway, it should now be relatively easy to implement +`git annex initremote --list-params-for=S3` diff --git a/doc/devblog/day_617__remote_config_parsing_continued/comment_1_d3ab2e85ea775cc6f39f12210374c35f._comment b/doc/devblog/day_617__remote_config_parsing_continued/comment_1_d3ab2e85ea775cc6f39f12210374c35f._comment new file mode 100644 index 0000000000..541bb6487d --- /dev/null +++ b/doc/devblog/day_617__remote_config_parsing_continued/comment_1_d3ab2e85ea775cc6f39f12210374c35f._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="thanks" + date="2020-01-23T16:51:44Z" + content=""" +\"the only user-visible improvement is these error messages\" -- FWIW, I've been bitten by the lack of config param checking in the past (thought I had set a chunk size but didn't due to misspelled param name, had to re-create the remote.) +"""]] diff --git a/doc/forum/Annex_v7_repos_and_plain_git_files.mdwn b/doc/forum/Annex_v7_repos_and_plain_git_files.mdwn new file mode 100644 index 0000000000..16c3b00833 --- /dev/null +++ b/doc/forum/Annex_v7_repos_and_plain_git_files.mdwn @@ -0,0 +1,23 @@ +Hi, +This is not an issue, more some questions related to my 'legacy' git-annex worklfow which is disturbed with v7 repos. + +## The context +I've got an old repo (initially v5) which has both plain git txt files and annexed binary files. The plain git files don't go through git-annex, only git. +THis way, I have the classic git history of the txt files, versionning, plus the management of the (big) binaries via git-annex. +The best of the two worlds. + +## The pb +I converted it to v7, and it is now not possible to add plain git files anymore though 'git add', they are managed by git-annex automatically, probably because of the smudge filter added in .git/info/attributes. +I understand (and like) this new behaviour, because it avoids adding big binary files though 'git add' by accident. But in this repo, I would like to come back to the old behaviour. + +## What I did +I disabled git-annex smudge filter in the file .git/info/attributes, which is now: + + #* filter=annex + .* !filter + +## Question at 100000$ +Is this safe ? + +## Question 2 +Is there an other way of achieving this (adding plain git files outside git-annex) ? diff --git a/doc/forum/Annex_v7_repos_and_plain_git_files/comment_1_3bf39b5e6990d4343d9154465d951a77._comment b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_1_3bf39b5e6990d4343d9154465d951a77._comment new file mode 100644 index 0000000000..64d442c0ad --- /dev/null +++ b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_1_3bf39b5e6990d4343d9154465d951a77._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="adding plain git files in v7 repos" + date="2020-02-09T18:08:30Z" + content=""" +What git-annex version are you using, and what are your git config settings? In the current version, if `annex.largefiles` is not set, or if `annex.gitaddtoannex` is set to `false`, `git add` should [[add all files to plain git|forum/lets_discuss_git_add_behavior/#comment-37e0ecaf8e0f763229fd7b8ee9b5a577]]. +"""]] diff --git a/doc/forum/Annex_v7_repos_and_plain_git_files/comment_2_a0628a013b57c00ea449b40b93edd385._comment b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_2_a0628a013b57c00ea449b40b93edd385._comment new file mode 100644 index 0000000000..37efa8fa08 --- /dev/null +++ b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_2_a0628a013b57c00ea449b40b93edd385._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="oliv5" + avatar="http://cdn.libravatar.org/avatar/d7f0d33c51583bbd8578e4f1f9f8cf4b" + subject="comment 2" + date="2020-02-09T22:59:35Z" + content=""" +Thks for pointing me to the \"git add v7\" very interesting thread. It explains everything. + +I'm using git-annex version: 7.20190912 (the default in Ubuntu 19.10 packages). + +Neither annex.gitaddtoannex nor annex.largefiles were set in my repo. In this version, I have to set annex.largefiles to \"nothing\" to get the expected behaviour, annex.gitaddtoannex true/false does not change anything. Of course, I verified this behaviour in a fresh new repo without any config change/hack. + +But even if annex.gitaddtoannex does not change anything right now, I'll set it to false, in case later version of git-annex uses it. + +"""]] diff --git a/doc/forum/Annex_v7_repos_and_plain_git_files/comment_3_6095eaa58aa298aca72237381f567ffb._comment b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_3_6095eaa58aa298aca72237381f567ffb._comment new file mode 100644 index 0000000000..04edbe35aa --- /dev/null +++ b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_3_6095eaa58aa298aca72237381f567ffb._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="updating git-annex" + date="2020-02-10T00:05:00Z" + content=""" +You can use [[install/conda]] to install a recent git-annex version that supports `annex.gitaddtoannex`. +"""]] diff --git a/doc/forum/Annex_v7_repos_and_plain_git_files/comment_4_fd1a3658ed6f3b6af2297c768b3f1ac2._comment b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_4_fd1a3658ed6f3b6af2297c768b3f1ac2._comment new file mode 100644 index 0000000000..4754643883 --- /dev/null +++ b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_4_fd1a3658ed6f3b6af2297c768b3f1ac2._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="git-annex ubuntu package" + date="2020-02-10T02:07:10Z" + content=""" +P.S. It's a bit concerning if 7.20190912 is the default git-annex version in a common distro: it's the version most likely to confuse users with the changed `git add` behavior. Can this be fixed? Is it definitely the default version? I don't know much about Ubuntu packaging, but from [https://packages.ubuntu.com/](https://packages.ubuntu.com/), 19.10 (`focal`) seems to have version [7.20191230](https://packages.ubuntu.com/focal/utils/git-annex)? +"""]] diff --git a/doc/forum/Annex_v7_repos_and_plain_git_files/comment_5_f47fec91d4ceebe653bc3fb221e2f8df._comment b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_5_f47fec91d4ceebe653bc3fb221e2f8df._comment new file mode 100644 index 0000000000..34b1aa52ac --- /dev/null +++ b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_5_f47fec91d4ceebe653bc3fb221e2f8df._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="oliv5" + avatar="http://cdn.libravatar.org/avatar/d7f0d33c51583bbd8578e4f1f9f8cf4b" + subject="comment 5" + date="2020-02-10T10:10:06Z" + content=""" +Thks for pointing me to Conda, I didn't know it. + +About Ubuntu 19.10 old git-annex package, it seems I have the expected revision. I rarely mess with the package manager (only nvidia drivers!), so I don't expect any issue there. + + + + +Yes, I agree, my git-annex package is an old one. Ubuntu is supposed to be a stable main stream distro, so optional packages like git-annex are not the cutting edge ones. And 19.10 is not a LTS release, you can expect the situation to be worse on 18.04 LTS. Same on Debian, Raspbian etc... + + +"""]] diff --git a/doc/forum/Annex_v7_repos_and_plain_git_files/comment_6_21648eecfaf9673b25d40327d11f7b59._comment b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_6_21648eecfaf9673b25d40327d11f7b59._comment new file mode 100644 index 0000000000..7dc724bf5c --- /dev/null +++ b/doc/forum/Annex_v7_repos_and_plain_git_files/comment_6_21648eecfaf9673b25d40327d11f7b59._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="default git-annex version in distros" + date="2020-02-10T18:55:29Z" + content=""" +\"you can expect the situation to be worse on 18.04 LTS\" -- if the default version there is before 7.20190912 there is no chance of user confusion, only versions 7.20190912 through 7.20191017 can cause it. I take it it's not possible to change the default version included in existing distributions? Sorry, don't know how Ubuntu/Debian packaging works... +"""]] diff --git a/doc/forum/Export_files_that_aren__39__t_locally_present__63__.mdwn b/doc/forum/Export_files_that_aren__39__t_locally_present__63__.mdwn new file mode 100644 index 0000000000..86af3ca436 --- /dev/null +++ b/doc/forum/Export_files_that_aren__39__t_locally_present__63__.mdwn @@ -0,0 +1,7 @@ +I use git-annex to manage my Sansa Clip Zip running Rockbox as a directory special remote since it has a FAT filesystem and I don't want to waste half the storage. I'd like to avoid having a copy of all my music and podcasts on my laptop as well, but git-annex only seems to be able to export files that are locally present. Would it be possible to have git-annex try to copy non-present files directly from remotes where it believes the files are present, starting with the lowest cost remote? + +It would also be cool to be able to convert subdirectory information on the remote into metadata in the repository. For example, I delete podcasts after listening to them, so when git-annex detects that it could either move it into the archive dir or add a "listened" tag in the repo. + +Soeaking of metadata, even though special remotes don't support it, I think it would be reasonable to treat files that have never been imported as having no metadata or some configurable default metadata per directory (like tag=listened or status=new) and use the metadata in the repo for files that have been imported previously when evaluating the wanted expression. + +Thoughts? diff --git a/doc/forum/Export_files_that_aren__39__t_locally_present__63__/comment_1_278b70c7bd7397e634df4a257fb604f9._comment b/doc/forum/Export_files_that_aren__39__t_locally_present__63__/comment_1_278b70c7bd7397e634df4a257fb604f9._comment new file mode 100644 index 0000000000..8898895f10 --- /dev/null +++ b/doc/forum/Export_files_that_aren__39__t_locally_present__63__/comment_1_278b70c7bd7397e634df4a257fb604f9._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="comment 1" + date="2020-01-26T18:43:55Z" + content=""" +It's best to post enhancement suggestions under [[todo]]. In this case there's already a similar item at [[todo/git-annex-export_--from_option]], so let's move this thread there. +"""]] diff --git a/doc/forum/Get_files_in_repository_A__44___that_do_not_exist_in_repository_B.mdwn b/doc/forum/Get_files_in_repository_A__44___that_do_not_exist_in_repository_B.mdwn new file mode 100644 index 0000000000..69d3c54fd4 --- /dev/null +++ b/doc/forum/Get_files_in_repository_A__44___that_do_not_exist_in_repository_B.mdwn @@ -0,0 +1,17 @@ +Hey folks. + +Repository B is a external 4Tb HDD kept in cold storage in an offsite location. It was a fully copy of everything in Git Annex about a year ago and serves as an offsite, offline backup. + +I'd like to update it. + +Repository A is my laptop, with about a 500Gb HDD. It probably has enough free space to `git annex get` a copy of all files that have been created since 1 year ago in Repository B. I'd like to; + +1) Get those files to my laptop that need updating in repository B +2) Head to the offsite location. +3) Mount the Repository B HDD on my laptop +4) From the Repository B, add A (the laptop) as a remote, run a "get" and a "sync", effectively updating Repository B with a "delta" of new files +5) From Repository A, sync with B, getting an updated index of what exists on Repository B, for updating all the other online repositories I am back home. + +The question is, how do I structure the command in Step 1 to get the "delta" of files to update Repository B with? + +Thanks! diff --git a/doc/forum/Get_files_in_repository_A__44___that_do_not_exist_in_repository_B/comment_1_c09c45c64f75bd30deaae9c8d8633236._comment b/doc/forum/Get_files_in_repository_A__44___that_do_not_exist_in_repository_B/comment_1_c09c45c64f75bd30deaae9c8d8633236._comment new file mode 100644 index 0000000000..b25e42fccb --- /dev/null +++ b/doc/forum/Get_files_in_repository_A__44___that_do_not_exist_in_repository_B/comment_1_c09c45c64f75bd30deaae9c8d8633236._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="comment 1" + date="2020-01-24T00:26:35Z" + content=""" +[[git-annex-get]] takes [[git-annex-matching-options]]; see `--in=repository` . +"""]] diff --git a/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_4_51548724d72e2cc6cb2c6e8c3b82e3e4._comment b/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_4_51548724d72e2cc6cb2c6e8c3b82e3e4._comment new file mode 100644 index 0000000000..b75702da37 --- /dev/null +++ b/doc/forum/How_to_prevent_copies_on_a_single_device_and_use_only_hardlinks/comment_4_51548724d72e2cc6cb2c6e8c3b82e3e4._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="satya.ortiz-gagne@a4c92de91eb4fd5ae8fc9893bb4fd674a19f2e59" + nickname="satya.ortiz-gagne" + avatar="http://cdn.libravatar.org/avatar/79c93025f174cd2aff98fbb952702c09" + subject="using hardlinks" + date="2020-01-10T16:11:47Z" + content=""" +Thanks for your help. Yes I believe that post-checkout hook could do the trick but I really like your idea of using a FUSE filesystem. Thanks a lot for sharing. I also believe this could be the base to progressively get the content of an indexed archive (like .zip) as it's getting needed. + +The worktree is a very interesting feature but I'm also using [DataLad 0.11.8](https://www.datalad.org/) which is unfortunately incompatible with it for the moment. + +As for my objective to not use locked files, I initially though that the script of a library I was using to preprocess some data was failing because of the fact the files were symlinks but I couldn't reproduce. Unfortunately, too many factor changed so I'm just going to assume I was doing something wrong. Still, it would sometimes be useful to work with unlocked files in the case I'm doing a multi-phases (with multi-commits) preprocessing of a big file. In that case, a phase would modify the file, trigger a copy by unlocking it and annex the modified file. I would be interested into skipping the copy to save a significant amount of time and space since the intermediate states of the file are only temporary. The checksums are still interesting to make sure the phase correctly executed. But that is very specific and will not happen too often so I'm fine with workarounds. +"""]] diff --git a/doc/forum/Manually_moving_annex_objects_to_new_repo.mdwn b/doc/forum/Manually_moving_annex_objects_to_new_repo.mdwn new file mode 100644 index 0000000000..0aba25fd49 --- /dev/null +++ b/doc/forum/Manually_moving_annex_objects_to_new_repo.mdwn @@ -0,0 +1,12 @@ +I want to create a repo with the contents of an old repo, but with a fresh commit history, etc. I do not care about preserving the old repo. + +However, I just want to make sure that the following steps will not result in the new repo being broken in some way: + +1. Create a new git annex repo. +2. Copy all of the symlinks from the old repo to the new one. +3. Move .git/annex/objects from the old repo to the new one. +4. Then "git annex add" everything in the new repo and commit. + +Please let me know also if there is a better way to achieve the same results. + +Thanks for your help. diff --git a/doc/forum/Manually_moving_annex_objects_to_new_repo/comment_1_f9b894bafc5ab19ec7b0b500ddfe00ef._comment b/doc/forum/Manually_moving_annex_objects_to_new_repo/comment_1_f9b894bafc5ab19ec7b0b500ddfe00ef._comment new file mode 100644 index 0000000000..b885ca1bde --- /dev/null +++ b/doc/forum/Manually_moving_annex_objects_to_new_repo/comment_1_f9b894bafc5ab19ec7b0b500ddfe00ef._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="starting over with a new commit history" + date="2020-01-25T01:07:19Z" + content=""" +You could [squash all commits](https://stackoverflow.com/questions/25356810/git-how-to-squash-all-commits-on-branch) on all your branches to one commit. See also [[git-annex-forget]] . +"""]] diff --git a/doc/forum/Manually_moving_annex_objects_to_new_repo/comment_2_6125e6a7d6b15dee7bf6a6bfe99bcf73._comment b/doc/forum/Manually_moving_annex_objects_to_new_repo/comment_2_6125e6a7d6b15dee7bf6a6bfe99bcf73._comment new file mode 100644 index 0000000000..cae3575085 --- /dev/null +++ b/doc/forum/Manually_moving_annex_objects_to_new_repo/comment_2_6125e6a7d6b15dee7bf6a6bfe99bcf73._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="CandyAngel" + avatar="http://cdn.libravatar.org/avatar/15c0aade8bec5bf004f939dd73cf9ed8" + subject="comment 2" + date="2020-01-25T01:11:42Z" + content=""" +Create the new repository and then add the \"source\" repository as a [local cache](/tips/local_caching_of_annexed_files). + +This will allow you to copy the symlinks to the new repository and `git annex get` the content, in any order you like, with all the safety precautions of git-annex. The fact that it came from the cache isn't stored either, so it is added cleanly! + +I use this method very heavily and it works really well. +"""]] diff --git a/doc/forum/Manually_moving_annex_objects_to_new_repo/comment_3_8203d8e0444471b9360b679f49f67fac._comment b/doc/forum/Manually_moving_annex_objects_to_new_repo/comment_3_8203d8e0444471b9360b679f49f67fac._comment new file mode 100644 index 0000000000..e13fc58680 --- /dev/null +++ b/doc/forum/Manually_moving_annex_objects_to_new_repo/comment_3_8203d8e0444471b9360b679f49f67fac._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="erewhon" + avatar="http://cdn.libravatar.org/avatar/b9bd5ad7176ebe149d0f051dcfe0a63e" + subject="Thank you" + date="2020-02-02T18:44:51Z" + content=""" +Thank you both for the suggestions. I am going to give the approach using the local cache a try. +"""]] diff --git a/doc/forum/Paths_changed_in_.git__47__annex__47__objects.mdwn b/doc/forum/Paths_changed_in_.git__47__annex__47__objects.mdwn new file mode 100644 index 0000000000..4d0a889894 --- /dev/null +++ b/doc/forum/Paths_changed_in_.git__47__annex__47__objects.mdwn @@ -0,0 +1,31 @@ +Hello, + +Has anyone encountered this issue: + +I have a repository in version 7. It has various remotes and clones from my frantic attempts to recover my data without really knowing what I'm doing. Anyways, the files in the repository are text files with hashes in them: + + + cat demo_beheer.gpkg + /annex/objects/SHA256E-s204800--a518c074bc22f673f0c73191a01426fef0a7d8b262a17d2729a4a3ac51da40ce.gpkg + + +But in `.git/annex/objects` there are two-letter directories. I can find this file in there, but its name is different than the above. All the .gpkg files under `.git/annex/objects` are intact (I can open them), and also appear to be versions of the same file (the `demo_beheer.gpkg` one I'm looking for). + + + find -name '*.gpkg' -type f + ./.git/annex/objects/Gz/v4/SHA256E-s204800--01120000361af90c29ee27a51ef7a6157bc413dc768d8ba495b7df8360c6dbfe.gpkg/SHA256E-s204800--01120000361af90c29ee27a51ef7a6157bc413dc768d8ba495b7df8360c6dbfe.gpkg + ./.git/annex/objects/mV/6j/SHA256E-s204800--222d8fe6975a07a6305b27a453c7db62df0518458d53252bee2f8bac16d1329c.gpkg/SHA256E-s204800--222d8fe6975a07a6305b27a453c7db62df0518458d53252bee2f8bac16d1329c.gpkg + ./.git/annex/objects/Q3/30/SHA256E-s204800--71d2d90cb98ea98806b6f9ae479ffae7d2d7f6b1fb6ea970c108ef0b7b0a52ec.gpkg/SHA256E-s204800--71d2d90cb98ea98806b6f9ae479ffae7d2d7f6b1fb6ea970c108ef0b7b0a52ec.gpkg + ./.git/annex/objects/X7/QJ/SHA256E-s204800--d3ce397eb2f1d5080641e15a8d28a5ebabf56ce03b756ed6ceb93fec0d390c72.gpkg/SHA256E-s204800--d3ce397eb2f1d5080641e15a8d28a5ebabf56ce03b756ed6ceb93fec0d390c72.gpkg + ./.git/annex/transfer/failed/download/a3f8a46a-60fe-58e2-901b-2c093bcc22d3/SHA256E-s3989504--78a7d01d5b7331cb867464c5787264292b39905533be58822a63c9f6d9ea8b3d.gpkg + ./.git/annex/transfer/failed/download/a3f8a46a-60fe-58e2-901b-2c093bcc22d3/SHA256E-s1929216--03c015bdd9ac6efadf5d855bed734fb20531938d93d70c075cfc47b5f3f3a64b.gpkg + ./.git/annex/transfer/failed/download/a3f8a46a-60fe-58e2-901b-2c093bcc22d3/SHA256E-s946176--49fe39ceb46d2518bfff4dfe2e1e83c043ad76f13871c4143a5fff68540d943c.gpkg + ./.git/annex/transfer/failed/download/a3f8a46a-60fe-58e2-901b-2c093bcc22d3/SHA256E-s204800--a518c074bc22f673f0c73191a01426fef0a7d8b262a17d2729a4a3ac51da40ce.gpkg + ./.git/annex/transfer/failed/download/a3f8a46a-60fe-58e2-901b-2c093bcc22d3/SHA256E-s51445760--a44651f781e5fce11bc498ba7fc30a0b79e5f7e282226852ef949f45888fb6eb.gpkg + ./.git/annex/transfer/failed/download/a3f8a46a-60fe-58e2-901b-2c093bcc22d3/SHA256E-s4091904--3eb3df304d9fab549dcc657198c88bfc300f8c11836ecc62ae68538a67e3d430.gpkg + ./.git/annex/transfer/failed/download/a3f8a46a-60fe-58e2-901b-2c093bcc22d3/SHA256E-s63684608--e5ff8eb805b96c7e231b7450514d101397a98a11f9320416d78084b9cad58e93.gpkg + ./demo_beheer.gpkg + +Is this a recognized result of some slip-up I've made? I can't remember what exactly I did to reach here other than try to clone the repo various times, and I might have messed up some paths because of struggling with the gcrypt url formats. I can of course provide more details if that would help. Alternatively, does someone have an idea of how I could recover these files simply, to start over? + +Help would be much appreciated. diff --git a/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_1_e89aa61303d97bfd433e24173867723a._comment b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_1_e89aa61303d97bfd433e24173867723a._comment new file mode 100644 index 0000000000..e73ec845f7 --- /dev/null +++ b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_1_e89aa61303d97bfd433e24173867723a._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="fasthans@87884c78197fe1a25ac25ae7fa1d930c147e9592" + avatar="http://cdn.libravatar.org/avatar/6d89ddec3053d8df7e57cf2f28c8cdcd" + subject="prior research" + date="2020-01-07T19:07:17Z" + content=""" +I forgot to mention, all I've been able to find that seemed related was this post: https://git-annex.branchable.com/bugs/fix_git-annex_paths___47___objects___40__repository_not_available__41__/ + +but I couldn't figure out if this was a similar problem to mine. +"""]] diff --git a/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_2_3e467d256ef7c2f4d469073c00cacbca._comment b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_2_3e467d256ef7c2f4d469073c00cacbca._comment new file mode 100644 index 0000000000..f1ff0f671d --- /dev/null +++ b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_2_3e467d256ef7c2f4d469073c00cacbca._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-01-07T19:24:31Z" + content=""" +"/annex/objects/SHA256E-s204800--a518c074bc22f673f0c73191a01426fef0a7d8b262a17d2729a4a3ac51da40ce.gpkg" is +used when an annexed object in a v7 repository is unlocked. + +If you run `git annex lock` on it, it will be turned backed into a symlink to +the .git/annex/objects file. + +Normally unlocked files have that pointer replaced with the file content when +it's available, and only when the file content is not available would you see +that pointer. I guess you've done something to get your repository into this +state where the content is present but the unlocked file is not populated with +it. It's likely that running `git annex fsck` on the file would fix that problem. +"""]] diff --git a/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_3_119c181afe351a412e8a13c59175ec45._comment b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_3_119c181afe351a412e8a13c59175ec45._comment new file mode 100644 index 0000000000..a08729f665 --- /dev/null +++ b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_3_119c181afe351a412e8a13c59175ec45._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-01-07T19:31:14Z" + content=""" +Looking more closely at your list of files, your repository does not +contain a copy of the current version of demo_beheer.gpkg, which is +"SHA256E-s204800--a518c074bc22f673f0c73191a01426fef0a7d8b262a17d2729a4a3ac51da40ce.gpkg" + +There's evidence you tried to download that key from somewhere, but the +download failed. + +So, it seems that your repository is not in any unsual state, you're just +confused about how an unlocked file that is not present looks. Probably +commands like these will be useful: + + git annex get demo_beheer.gpkg + git annex whereis demo_beheer.gpkg +"""]] diff --git a/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_4_0a9cec8f4e26c2e2365b54b29c91ee3d._comment b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_4_0a9cec8f4e26c2e2365b54b29c91ee3d._comment new file mode 100644 index 0000000000..c5ebf224b9 --- /dev/null +++ b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_4_0a9cec8f4e26c2e2365b54b29c91ee3d._comment @@ -0,0 +1,41 @@ +[[!comment format=mdwn + username="fasthans@87884c78197fe1a25ac25ae7fa1d930c147e9592" + avatar="http://cdn.libravatar.org/avatar/6d89ddec3053d8df7e57cf2f28c8cdcd" + subject="comment 4" + date="2020-01-08T12:08:23Z" + content=""" +Thanks for your attention, Joey. + +Hmm. `git annex fsck` reports 'ok' for all files. After that, running get/whereis I get: + +```` +git annex get demo_beheer.gpkg +get demo_beheer.gpkg (not available) + Try making some of these repositories available: + a3f8a46a-60fe-58e2-901b-2c093bcc22d3 -- smdata_encrypted_remote_wd_elements_small +failed +git-annex: get: 1 failed +```` + +this is the remote in question, which is accessible (it's on the same drive as the current repo) + +```` +git remote -v +smdata_encrypted_remote_wd_elements_small gcrypt::/media/hans/Elements/smdata_encrypted_remote_wd_elements_small/ (fetch) +smdata_encrypted_remote_wd_elements_small gcrypt::/media/hans/Elements/smdata_encrypted_remote_wd_elements_small/ (push) +```` + +However, I can run `git annex sync smdata_remote_wd_elements_small`, which does complete successfully, so why is it not accessible with `get` or `copy`? + +If I `lock` demo_beheer.gpkg, it turns into this symlink: + +```` +lrwxrwxrwx 1 hans hans 198 sep 2 07:32 demo_beheer.gpkg -> .git/annex/objects/px/Mg/SHA256E-s204800--a518c074bc22f673f0c73191a01426fef0a7d8b262a17d2729a4a3ac51da40ce.gpkg/SHA256E-s204800--a518c074bc22f673f0c73191a01426fef0a7d8b262a17d2729a4a3ac51da40ce.gpkg +```` + +of which the target indeed does not exist (the directory exists, not the file). However, git annex get still fails after locking (and a subsequent sync) and I am at a loss to know why. + +This wouldn't be due to the nature of the remote, or my URL for it, or something? Decryption works fine when syncing. + + +"""]] diff --git a/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_5_df986910016ce53a6155edf96f4c312d._comment b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_5_df986910016ce53a6155edf96f4c312d._comment new file mode 100644 index 0000000000..675ecc6841 --- /dev/null +++ b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_5_df986910016ce53a6155edf96f4c312d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2020-01-08T18:24:43Z" + content=""" +`git-annex sync` does not, by default, download the content of annexed +files. Use `git annex get` +"""]] diff --git a/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_6_c4fe35a93ed32f7b14c5ffd5a4a8ec27._comment b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_6_c4fe35a93ed32f7b14c5ffd5a4a8ec27._comment new file mode 100644 index 0000000000..80afc809c8 --- /dev/null +++ b/doc/forum/Paths_changed_in_.git__47__annex__47__objects/comment_6_c4fe35a93ed32f7b14c5ffd5a4a8ec27._comment @@ -0,0 +1,77 @@ +[[!comment format=mdwn + username="hans" + avatar="http://cdn.libravatar.org/avatar/8bf4f81bbfce61274f5def013dde7083" + subject="comment 6" + date="2020-01-16T14:22:03Z" + content=""" +I have been trying `get` and `copy` and `whereis`. They all report failure and suggest making the remote `a3f8a46a-60fe-58e2-901b-2c093bcc22d -- smdata_encrypted_remote_wd_elements_small` available. + +The remote with this name is available -- at least, I can `sync` with it. However, in `.git/config` the `annex-uuid` is `97d51497-158f-54ef-baef-77a720c9d758`. + +as far as I can tell -- I'm shooting in the dark here -- that isn't the issue, because if I change the uuid in `.git/config` it still gives the same error, now with the changed uuid. + +So, let me review my mental model of this situation, which will hopefully reveal the gaping holes: + +normally, content tracking means each repo knows which other repos have copies of the file. In locked mode, as you said, the file is a symlink to the annexed object and if that target is missing the symlink is simply broken. In unlocked mode, the file is present at its correct location but if it is missing it is replaced with a text file with an annex object path as the contents. + +So, my content has somehow gone missing. + +Not knowing very much about git-annex's internals, my next question would be: how can I look for this content? I can't explore the files in the remote manually, since they are encrypted. This is what happens if I clone that bare repo again: + +``` +$mkdir test && cd test +$git init +$git remote add origin gcrypt::/media/hans/Elements/smdata_encrypted_remote_wd_elements_small +$git annex init +init ok +(recording state in git...) +$git annex whereis demo_beheer.gpkg +whereis demo_beheer.gpkg (0 copies) failed #ok, so I need to sync first +$git annex sync +commit +On branch master +nothing to commit, working tree clean +ok +pull origin +gcrypt: Decrypting manifest +gpg: Signature made do 16 jan 2020 14:08:44 CET +gpg: using RSA key xxxxxxxxxxxxxxxxxxxxxx +gpg: Good signature from \"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" [ultimate] +From gcrypt::/media/hans/Elements/smdata_encrypted_remote_wd_elements_small + * [new branch] synced/master -> origin/synced/master + * [new branch] synced/git-annex -> origin/synced/git-annex + * [new branch] git-annex -> origin/git-annex +ok +(merging origin/git-annex into git-annex...) +(recording state in git...) +push origin +gcrypt: Decrypting manifest +gpg: Signature made do 16 jan 2020 14:08:44 CET +gpg: using RSA key xxxxxxxxxxxxx +gpg: Good signature from \"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" [ultimate] +Enumerating objects: 8, done. +Counting objects: 100% (8/8), done. +Compressing objects: 100% (6/6), done. +Total 8 (delta 1), reused 1 (delta 0) +gcrypt: Encrypting to: --throw-keyids --default-recipient-self +gcrypt: Requesting manifest signature +gpg: using \"xxxxxxxxxxxxxxxxxx\" as default secret key for signing +To gcrypt::/media/hans/Elements/smdata_encrypted_remote_wd_elements_small + 10150f10..79a49878 git-annex -> synced/git-annex +ok +#try again +$git annex whereis demo_beheer.gpkg +whereis demo_beheer.gpkg (1 copy) + a3f8a46a-60fe-58e2-901b-2c093bcc22d3 -- smdata_encrypted_remote_wd_elements_small +ok +``` + +So: the `git-annex` branch is indicating that the content is available in this bare remote, but is that not true? Is there a way for me to determine (with or without git-annex) if the content is actually there or not? + +From the speed at which the `git annex get` command returns its error message I get the impression that it's not actually checking the remote, but determining from the local `git-annex` branch that the content is not in the remote. Is that correct? Why does it then suggest making that remote available? In that case, is there a way to figure out if and when the `git-annex` branch logs have diverged from reality, OR alternatively how to find out if there is content in that repository? + +What is also really bugging me is that I have the content of some files available in one of my clones, but I can't access them via git-annex. My files are of such a number and format that manually fishing them out of `.git/annex/objects` is not feasible (e.g. shapefiles which consist of about six different files). + + +Finally, the most relevant git-annex command I've been able to find is `git annex unused`, which gives me some interesting information in the bare repository (1900+ unused objects), but `addunused` doesn't seem to bring things back. +"""]] diff --git a/doc/forum/Recomended_Setup_for_Syncthing_Remote.mdwn b/doc/forum/Recomended_Setup_for_Syncthing_Remote.mdwn new file mode 100644 index 0000000000..56ace65c32 --- /dev/null +++ b/doc/forum/Recomended_Setup_for_Syncthing_Remote.mdwn @@ -0,0 +1,5 @@ +Hello! + +     So with Android 10, Syncthing may be facing some issues regarding where we can put our files on the device. However, the Syncthing-Fork from the Google Play store has granular control over when sync tasks can run, which I very much require; I was thinking of combining git-annex and Syncthing by syncing the `.git/annex/objects` directory, but I am still *very* new to this. Can anyone advise me what the best setup would be for the procedure, according to [`syncthing special remote`](https://git-annex.branchable.com/todo/syncthing_special_remote/)? + +     Thank you kindly for the help! diff --git a/doc/forum/Recovering_from_a_cross-merge.mdwn b/doc/forum/Recovering_from_a_cross-merge.mdwn new file mode 100644 index 0000000000..bde8138b41 --- /dev/null +++ b/doc/forum/Recovering_from_a_cross-merge.mdwn @@ -0,0 +1,80 @@ +Intro +===== + +This experience report +describes steps I've taken for recovering from a situation where +an *unrelated* git-annex's remote was accidentally merged into a repository. + +It is posted to the forum for use by anyone who finds themselves in the same situation +(especially myself…). + +The root cause of the issue was a copy-pasted `git remote add` gone wrong, +and a subsequent `git annex sync`, that "contaminated" the rest of my remotes. +That led to `git annex info` showing the union of all the repositories available to the two repositories, +and `fsck --all` runs looking for files from any repository. + +It should go without saying, but here it is anyway: + +**Following these steps can eventualy lead to data loss**. + +The precautions I've taken are + +* knowing that two complete copies of the data sets exist, +* having a filesystem level snapshot of a least one of those copies, and +* not starting any file dropping until all remotes have completed fscks at the end. + +Identifying the last good state +=============================== + +By looking for the first occurrence of the UUID of one of the bad new remotes +in `git log --patch git-annex`, +I've identified the last good git-annex state before the merge. + +Tagging that as `git tag before-accidental-merging-with-other-server 83c1b945c2428cefa968aec587229f6a87649de6`. + + +Removing potentially mergable information +========================================= + +git-annex is eager to pull in updates lying around -- +while this is usually a good thing, +here it incurs the danger of resurrecting the accident. + +On all remotes that were accessed since the accident, +I've executed this to remove both the local synced/git-annex branch +and any memory of cached remote branches: + + $ git branch -D synced/git-annex + $ git branch -r | sed 's@remotes/@@' | xargs git branch -d -r + +and restore the git-annex branch: + + $ git branch -f git-annex 83c1b945c2428cefa968aec587229f6a87649de6 + +That proved to be insufficient -- +after I had first only done this, +things looked good for a while and then after the first `git annex fsck --fast`, +the remotes were back again. + +The only file large enough to contain the offending data in .git/annex was .git/annex/index, +so I've removed that backed by [[internals]]' statement of it being safe to remove: + + $ rm .git/annex/index + +(did that on all remotes; on bare ones it's `annex/index`, obviously). + +Verification +============ + +To ensure everyone is on the same page, +I've run `git annex sync`; +its speed already showed that now there's no information about a second repository being transferred. + +Subsequently, I've run `git annex fsck --all` in all locations. +(That *did* show that I should previously have marked some keys as dead when they were migrated from SHA256E to SHA256, +but that's beside the point here). + +Even after a sync following the above, +no traces of the bad merge (be it in the form of a repository or of a file from there) have shown up any more. + +-- [[chrysn]] diff --git a/doc/forum/Transparent_compression_of_files.mdwn b/doc/forum/Transparent_compression_of_files.mdwn new file mode 100644 index 0000000000..2b3a28a35f --- /dev/null +++ b/doc/forum/Transparent_compression_of_files.mdwn @@ -0,0 +1,5 @@ +Hi, + +I have a lot of files which are around 80MB and can be easily compressed down to ~55MB. I did some tests with brotli and decompression was reasonable fast, at least fast enough that I would probably not notice given my current transfer speeds. In order to save disk space I would like to able to transparently compress my files. That means, a file is stored compressed in git-annex's blob store and decompressed when I `get` it. + +I understand that gpg does compression, but I don't want to deal with encryption, all my repos are local. I've looked at the code and from what I could see the Hash-Backends are rather simple. However, that's probably not the right place. Is this a planned feature? Would it be hard to implement? Of course, ideally the compression algorithm should be configurable. E.g. by just doing a syscall to `brotli` or `gzip`. diff --git a/doc/forum/Transparent_compression_of_files/comment_1_7242325defa000572ca8e78e29012451._comment b/doc/forum/Transparent_compression_of_files/comment_1_7242325defa000572ca8e78e29012451._comment new file mode 100644 index 0000000000..a2a96f1b77 --- /dev/null +++ b/doc/forum/Transparent_compression_of_files/comment_1_7242325defa000572ca8e78e29012451._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="lykos@d125a37d89b1cfac20829f12911656c40cb70018" + nickname="lykos" + avatar="http://cdn.libravatar.org/avatar/085df7b04d3408ba23c19f9c49be9ea2" + subject="comment 1" + date="2020-02-13T12:29:40Z" + content=""" +It's quite easy to implement an [external special remote](https://git-annex.branchable.com/special_remotes/external/) that does transparent compression. You can use the example implementations of the directory remote as a starting point (see [Bash](https://git-annex.branchable.com/special_remotes/external/example.sh/) or [Python](https://github.com/Lykos153/AnnexRemote/blob/master/examples/git-annex-remote-directory)). Then just modify the dostore() and doretrieve() functions to your liking. You probably don't want to support exporting when compression is enabled, though. +"""]] diff --git a/doc/forum/Transparent_compression_of_files/comment_2_a75e32b0825de0b405c45de14b9711c0._comment b/doc/forum/Transparent_compression_of_files/comment_2_a75e32b0825de0b405c45de14b9711c0._comment new file mode 100644 index 0000000000..a1ba8b4d4b --- /dev/null +++ b/doc/forum/Transparent_compression_of_files/comment_2_a75e32b0825de0b405c45de14b9711c0._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="jochen.keil@38b1f86ab65128dab3e62e726403ceee4f5141bf" + nickname="jochen.keil" + avatar="http://cdn.libravatar.org/avatar/a1329c0b3a262017553cc5497aa12c18" + subject="comment 2" + date="2020-02-14T12:04:53Z" + content=""" +Thanks for your hint, I appretiate that and I think it could be done that way. + +However, on closer thought I was wondering if git-annex is the right tool for job. I had the impression that my idea came more from a hammer and nail situation. So, FUSE came to my mind. I popped it into google and found this: https://github.com/FS-make-simple/fusecompress +Unfortunately this does not look very active though. + +Now, since I'm already at the FS layer I can look into ZFS compression. My repos are already on ZFS but I haven't looked at the built-in compression yet. I think I'll evaluate that first. If none of that is satisfactory I'll turn to git-annex again :) +"""]] diff --git a/doc/forum/git-annex-sync_without_syncing_master.mdwn b/doc/forum/git-annex-sync_without_syncing_master.mdwn new file mode 100644 index 0000000000..4e8f6e05fb --- /dev/null +++ b/doc/forum/git-annex-sync_without_syncing_master.mdwn @@ -0,0 +1,6 @@ +How do I run git-annex-sync without syncing master? + +i.e. + +* fetch and union merge the git-annex branch from every remote listed +* push the updated git-annex branch to every remote listed diff --git a/doc/forum/git-annex-sync_without_syncing_master/comment_1_8ed0014e53a7ab49d76c072de074adda._comment b/doc/forum/git-annex-sync_without_syncing_master/comment_1_8ed0014e53a7ab49d76c072de074adda._comment new file mode 100644 index 0000000000..c870f76d02 --- /dev/null +++ b/doc/forum/git-annex-sync_without_syncing_master/comment_1_8ed0014e53a7ab49d76c072de074adda._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="Chel" + avatar="http://cdn.libravatar.org/avatar/a42feb5169f70b3edf7f7611f7e3640c" + subject="comment 1" + date="2020-02-03T22:36:59Z" + content=""" +If I am not mistaken, you need to: + +1) manually fetch `git-annex` from everywhere, + +2) not have the `synced/master` branch (or it should be an ancestor of `master`), + +3) use [[git-annex-merge]] or `git annex sync --no-pull --no-push` (maybe just `--no-pull` is enough), + +4) manually push `git-annex` back to every other repository. + +Other remarks: + +- You can use `git annex sync --cleanup` to delete all `synced/*` branches. +- Or you can check out another branch and run `git annex sync` — it will fetch all branches, but merge only the current. +- I don't fully understand how `synced/git-annex` works, so maybe the real answer is more complex. +"""]] diff --git a/doc/forum/git-annex-sync_without_syncing_master/comment_2_585648a04a02760a1f16394c00728d79._comment b/doc/forum/git-annex-sync_without_syncing_master/comment_2_585648a04a02760a1f16394c00728d79._comment new file mode 100644 index 0000000000..4c93dd0f42 --- /dev/null +++ b/doc/forum/git-annex-sync_without_syncing_master/comment_2_585648a04a02760a1f16394c00728d79._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="jafpoU" + avatar="http://cdn.libravatar.org/avatar/b1016484e481ea844c1fe42ace53e88f" + subject="comment 2" + date="2020-02-04T03:35:48Z" + content=""" +Thank you! + +Just to confirm then, `git-annex-merge` will + +1. union merge any ref with basename `git-annex` onto `git-annex` +2. union merge `synced/$branch` onto `$branch` where `$branch` is the checked out branch + +? + +My goal is for all remotes to have a consistent view of where the annexed files are, without auto-merging any normal branches. + +Is the process you described above the easiest way to achieve this? +"""]] diff --git a/doc/forum/git-annex-sync_without_syncing_master/comment_3_233c20435643d2701c94a2cf30ca6483._comment b/doc/forum/git-annex-sync_without_syncing_master/comment_3_233c20435643d2701c94a2cf30ca6483._comment new file mode 100644 index 0000000000..2b1fd79229 --- /dev/null +++ b/doc/forum/git-annex-sync_without_syncing_master/comment_3_233c20435643d2701c94a2cf30ca6483._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="Nick_P" + avatar="http://cdn.libravatar.org/avatar/abf8aa3ac1a976a6a292416b9c604581" + subject="comment 3" + date="2020-02-12T10:50:06Z" + content=""" +Relatedly, my team is struggling to identify the process for \"add and share a file\" - e.g., add the hash to git, and copy the content to a central git-annex remote, and push the git-annex branch information that the file is now in the central remote. + +We used 'git annex sync' and 'git annex sync --content' for a while, but this creates the synced/ branches which introduced confusion; and it's unclear why both --content and with args are needed; and if I remember, it also committed or pushed changes we didn't want it to. + +Now we do like this: + + git checkout features/your-new-branch + git annex add my-new-file + git commit -m \".....\" + git push # pushes your \"features\" branch + git annex copy --to SharedRemote --jobs=5 + git fetch origin git-annex:refs/remotes/origin/git-annex + git annex merge + git push origin git-annex + +I've read through , , + +Maybe the OP has the same confusion, that there seems to be a missing git-annex-sync that does only (a) copy to a remote and (b) the git-annex branch work - Anyone able to help out direct us to how it's intended to be done? +"""]] diff --git a/doc/forum/git-annex-sync_without_syncing_master/comment_4_a8f681ec33f2fef16b9b8eb6633f8fe4._comment b/doc/forum/git-annex-sync_without_syncing_master/comment_4_a8f681ec33f2fef16b9b8eb6633f8fe4._comment new file mode 100644 index 0000000000..582d294c7f --- /dev/null +++ b/doc/forum/git-annex-sync_without_syncing_master/comment_4_a8f681ec33f2fef16b9b8eb6633f8fe4._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="Nick_P" + avatar="http://cdn.libravatar.org/avatar/abf8aa3ac1a976a6a292416b9c604581" + subject="comment 4" + date="2020-02-18T10:15:13Z" + content=""" +I think the new 'git annex sync --only-annex' will replace the last three lines of my sequence above. I aim to experiment to confirm that. + +https://git-annex.branchable.com/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/#comment-52dbdba2331659011519bcc4bda4cf18 +"""]] diff --git a/doc/forum/git-annex-sync_without_syncing_master/comment_5_a125638afe0cf57fe97e6051da943406._comment b/doc/forum/git-annex-sync_without_syncing_master/comment_5_a125638afe0cf57fe97e6051da943406._comment new file mode 100644 index 0000000000..115c9ea42d --- /dev/null +++ b/doc/forum/git-annex-sync_without_syncing_master/comment_5_a125638afe0cf57fe97e6051da943406._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="Nick_P" + avatar="http://cdn.libravatar.org/avatar/abf8aa3ac1a976a6a292416b9c604581" + subject="comment 5" + date="2020-02-18T10:17:25Z" + content=""" +(repeat with corrected formatting) + +I think the new `git annex sync --only-annex` will replace the last three lines of my sequence above. I aim to experiment to confirm that. + + +"""]] diff --git a/doc/git-annex-adjust/comment_3_c5af3a73af8925839413467a7d1e0e14._comment b/doc/git-annex-adjust/comment_3_c5af3a73af8925839413467a7d1e0e14._comment new file mode 100644 index 0000000000..9e0cfbb342 --- /dev/null +++ b/doc/git-annex-adjust/comment_3_c5af3a73af8925839413467a7d1e0e14._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="mario" + avatar="http://cdn.libravatar.org/avatar/4c63b0935789d29210d0bd8cad8d7ac7" + subject="Thank you" + date="2020-01-23T19:52:47Z" + content=""" +This is is a great feature, especially `--hide-missing`! I really missed this in the past. (Strangely it took me until now to notice that you implemented it.) Thank you. +"""]] diff --git a/doc/git-annex-config.mdwn b/doc/git-annex-config.mdwn index 2347ab2463..01c5341eff 100644 --- a/doc/git-annex-config.mdwn +++ b/doc/git-annex-config.mdwn @@ -74,7 +74,12 @@ These settings can be overridden on a per-repository basis using * `annex.synccontent` - Set to true to make git-annex sync default to syncing content. + Set to true to make git-annex sync default to syncing annexed content. + +* `annex.synconlyannex` + + Set to true to make git-annex sync default to only sincing the git-annex + branch and annexed content. * `annex.securehashesonly` diff --git a/doc/git-annex-enableremote.mdwn b/doc/git-annex-enableremote.mdwn index d424f6056c..6620829854 100644 --- a/doc/git-annex-enableremote.mdwn +++ b/doc/git-annex-enableremote.mdwn @@ -8,26 +8,20 @@ git annex enableremote `name|uuid|desc [param=value ...]` # DESCRIPTION -Enables use of an existing remote in the current repository. +Enables use of an existing remote in the current repository, +that was set up earlier by `git annex initremote` run in +another clone of the repository. -This is often used to enable use of a special (non-git) remote, by -a different repository than the one in which it was -originally created with the initremote command. - -It can also be used to explicitly enable a git remote, -so that git-annex can store the contents of files there. First -run `git remote add`, and then `git annex enableremote` with the name of -the remote. - -When enabling a special remote, specify the same name used when originally -creating that remote with `git annex initremote`. Run +When enabling a remote, specify the same name used when originally +setting up that remote with `git annex initremote`. Run `git annex enableremote` without any name to get a list of -special remote names. Or you can specify the uuid or description of the -special remote. +remote names. Or you can specify the uuid or description of the +remote. -Some special remotes may need parameters to be specified every time they are -enabled. For example, the directory special remote requires a directory= -parameter every time. +Some types of special remotes need parameters to be specified every time +they are enabled. For example, the directory special remote requires a +directory= parameter every time. The command will prompt for any required +parameters you leave out. This command can also be used to modify the configuration of an existing special remote, by specifying new values for parameters that are @@ -59,8 +53,9 @@ a new clone, it will will attempt to enable the special remote. Of course, this works best when the special remote does not need anything special to be done to get it enabled. -(This command also can be used to enable a remote that git-annex has been -prevented from using by the `remote..annex-ignore` setting.) +(This command also can be used to enable a git remote that git-annex +has found didn't work before and gave up on using, setting +`remote..annex-ignore`.) # SEE ALSO diff --git a/doc/git-annex-initremote.mdwn b/doc/git-annex-initremote.mdwn index 8340827f6d..ca204eac7f 100644 --- a/doc/git-annex-initremote.mdwn +++ b/doc/git-annex-initremote.mdwn @@ -19,8 +19,11 @@ For a list and details, see The remote's configuration is specified by the parameters passed to this command. Different types of special remotes need different -configuration values. The command will prompt for parameters as needed. A -few parameters that are supported by all special remotes are documented in +configuration values, so consult the documentation of a special remote for +details. The command will prompt for any required parameters you leave out; +you can also pass --whatelse to see additional parameters. + +A few parameters that are supported by all special remotes are documented in the next section below. Once a special remote has been initialized once with this command, @@ -35,6 +38,15 @@ want to use `git annex renameremote`. # OPTIONS +* `--whatelse` / `-w` + + Describe additional configuration parameters that you could specify. + + For example, if you know you want a S3 remote, but forget how to + configure it: + + git annex initremote mys3 type=S3 --whatelse + * `--fast` When initializing a remote that uses encryption, a cryptographic key is @@ -66,7 +78,7 @@ want to use `git annex renameremote`. * `encryption` - All special remotes support encryption. You will need to specify + Almost all special remotes support encryption. You will need to specify what encryption, if any, to use. If you do not want any encryption, use `encryption=none` @@ -76,6 +88,7 @@ want to use `git annex renameremote`. For details about this and other encrpytion settings, see + or --whatelse * `autoenable` diff --git a/doc/git-annex-reinject.mdwn b/doc/git-annex-reinject.mdwn index e280a129bd..cd91226c62 100644 --- a/doc/git-annex-reinject.mdwn +++ b/doc/git-annex-reinject.mdwn @@ -36,8 +36,13 @@ needing to specify the dest file. With this option, each specified src file is hashed using the default key-value backend (or the one specified with `--backend`), and if git-annex - has a record of the file having been in the annex before, the content is - reinjected. + has a record of the resulting key having been in the annex before, the + content is reinjected. + + Note that, when using a key-value backend that includes the filename + extension in the key, this will only work if the src files have the same + extensions as the files with the same content that was originally added + to git-annex. Note that this will reinject old versions of files that have been modified or deleted from the current git branch. diff --git a/doc/git-annex-sync.mdwn b/doc/git-annex-sync.mdwn index 440aea59fd..e4cf8d0803 100644 --- a/doc/git-annex-sync.mdwn +++ b/doc/git-annex-sync.mdwn @@ -12,7 +12,7 @@ This command synchronizes the local repository with its remotes. The sync process involves first committing any local changes to files that have previously been added to the repository, -then fetching and merging the `synced/master` and the `git-annex` branch +then fetching and merging the current branch and the `git-annex` branch from the remote repositories, and finally pushing the changes back to those branches on the remote repositories. You can use standard git commands to do each of those steps by hand, or if you don't want to @@ -21,11 +21,18 @@ worry about the details, you can use sync. The content of annexed objects is not synced by default, but the --content option (see below) can make that also be synchronized. -Note that syncing with a remote will not normally update the remote's working -tree with changes made to the local repository. (Unless it's configured -with receive.denyCurrentBranch=updateInstead.) However, those changes -are pushed to the remote, so they can be merged into its working tree -by running "git annex sync" on the remote. +When using git-annex, often remotes are not bare repositories, because +it's helpful to add remotes for nearby machines that you want +to access the same annexed content. Syncing with a non-bare remote will +not normally update the remote's current branch with changes from the local +repository. (Unless the remote is configured with +receive.denyCurrentBranch=updateInstead.) + +To make working with such non-bare remotes easier, sync pushes not only +local `master` to remote `master`, but also to remote `synced/master` (and +similar with other branches). When `git-annex sync` is later run on the +remote, it will merge the `synced/` branches that the repository has +received. # OPTIONS @@ -39,9 +46,24 @@ by running "git annex sync" on the remote. Only sync with the remotes with the lowest annex-cost value configured. +* `--only-annex` `-a`, `--not-only-annex` + + Only sync the git-annex branch and annexed content with remotes, + not other git branches. + + This avoids pulling and pushing other branches, and it avoids committing + any local changes. It's up to you to use regular git commands to do that. + + The `annex.synconlyannex` configuration can be set to true to make + this be the default behavior of `git-annex sync`. To override such + a setting, use `--not-only-annex`. + + When this is combined with --no-content, only the git-annex branch + will be synced. + * `--commit`, `--no-commit` - A commit is done by default (unless annex.autocommit is set to false). + A commit is done by default (unless `annex.autocommit` is set to false). Use --no-commit to avoid committing local changes. @@ -51,8 +73,8 @@ by running "git annex sync" on the remote. * `--pull`, `--no-pull` - By default, git pulls from remotes and imports from some special remotes. - Use --no-pull to disable all pulling. + By default, syncing pulls from remotes and imports from some special + remotes. Use --no-pull to disable all pulling. When `remote..annex-pull` or `remote..annex-sync` are set to false, pulling is disabled for those remotes, and using @@ -60,7 +82,7 @@ by running "git annex sync" on the remote. * `--push`, `--no-push` - By default, git pushes changes to remotes and exports to some + By default, syncing pushes changes to remotes and exports to some special remotes. Use --no-push to disable all pushing. When `remote..annex-push` or `remote..annex-sync` are @@ -128,7 +150,7 @@ by running "git annex sync" on the remote. [[git-annex-resolvemerge]](1) for details.) Use `--no-resolvemerge` to disable this automatic merge conflict - resolution. It can also be disabled by setting annex.resolvemerge + resolution. It can also be disabled by setting `annex.resolvemerge` to false. * `--cleanup` diff --git a/doc/git-annex-unlock/comment_5_40abc819288a97753747562f16b8febe._comment b/doc/git-annex-unlock/comment_5_40abc819288a97753747562f16b8febe._comment index 25cd0dbb83..cc425484f4 100644 --- a/doc/git-annex-unlock/comment_5_40abc819288a97753747562f16b8febe._comment +++ b/doc/git-annex-unlock/comment_5_40abc819288a97753747562f16b8febe._comment @@ -3,10 +3,6 @@ subject="""Re: Move file from git annex to just git in one commit""" date="2019-09-30T17:29:39Z" content=""" - git annex unlock MYFILE - git -c annex.largefiles=nothing add MYFILE - git commit -n MYFILE - -(The -n is only needed with git-annex older than 7.20190912 but will work -with newer versions too.) +See [[tips/largefiles]], it has recipes for conversion from annex to git and +from git to annex. """]] diff --git a/doc/git-annex-unused/comment_11_5c1ea2e8ac475d8c0ddca2c38f968b95._comment b/doc/git-annex-unused/comment_11_5c1ea2e8ac475d8c0ddca2c38f968b95._comment new file mode 100644 index 0000000000..9c0c594fc5 --- /dev/null +++ b/doc/git-annex-unused/comment_11_5c1ea2e8ac475d8c0ddca2c38f968b95._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 11""" + date="2020-01-01T18:54:04Z" + content=""" +Generally if git-annex unused does not identify some object as unused, that +you expected would be unused, it's because it found a branch that still +uses that object. It might be some old branch (or tag) that you made and +forgot about, it might be a remote tracking branch, either for a remote +that you forgot to sync the changes to, or for an old remote that +got removed but in a way that left the tracking branch around. +annex.used-refspec can also pull in other, non-branch refs. +"""]] diff --git a/doc/git-annex-unused/comment_1_029c4bee59e48806c9f3d83579fdbb90/comment_1_c4d994706f69024788e6653d02a09517._comment b/doc/git-annex-unused/comment_2_c4d994706f69024788e6653d02a09517._comment similarity index 63% rename from doc/git-annex-unused/comment_1_029c4bee59e48806c9f3d83579fdbb90/comment_1_c4d994706f69024788e6653d02a09517._comment rename to doc/git-annex-unused/comment_2_c4d994706f69024788e6653d02a09517._comment index 81ce17ca76..4c06422d26 100644 --- a/doc/git-annex-unused/comment_1_029c4bee59e48806c9f3d83579fdbb90/comment_1_c4d994706f69024788e6653d02a09517._comment +++ b/doc/git-annex-unused/comment_2_c4d994706f69024788e6653d02a09517._comment @@ -3,6 +3,6 @@ subject="""comment 1""" date="2016-01-11T16:30:18Z" content=""" -The `--from` option can be used with any remote, whether it's a normal git +@mark The `--from` option can be used with any remote, whether it's a normal git remote or a special remote. """]] diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 9d5c11d70f..522fc2169d 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1115,7 +1115,15 @@ Like other git commands, git-annex is configured via `.git/config`. * `annex.synccontent` - Set to true to make git-annex sync default to syncing content. + Set to true to make git-annex sync default to syncing annexed content. + + To configure the behavior in all clones of the repository, + this can be set in [[git-annex-config]](1). + +* `annex.synconlyannex` + + Set to true to make git-annex sync default to only sincing the git-annex + branch and annexed content. To configure the behavior in all clones of the repository, this can be set in [[git-annex-config]](1). diff --git a/doc/install/Windows/comment_9_2687128ff7aa4b6f29e8b880b5d4d13d._comment b/doc/install/Windows/comment_9_2687128ff7aa4b6f29e8b880b5d4d13d._comment new file mode 100644 index 0000000000..00fad5d1c2 --- /dev/null +++ b/doc/install/Windows/comment_9_2687128ff7aa4b6f29e8b880b5d4d13d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 9""" + date="2020-01-01T19:04:08Z" + content=""" +@michael.fsp autobuilder was down. There's a build now for the most recent +release. +"""]] diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 0745004c74..cd48a39f89 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -180,13 +180,13 @@ Tracks what trees have been exported to special remotes by Each line starts with a timestamp, then the uuid of the repository that exported to the special remote, followed by a colon (`:`) and the uuid of the special remote. Then, separated by a spaces, -the sha1 of the tree that was exported, and optionally any number of -subsequent sha1s, of trees that have started to be exported but whose +the SHA of the tree that was exported, and optionally any number of +subsequent SHAs, of trees that have started to be exported but whose export is not yet complete. In order to record the beginning of the first export, where nothing -has been exported yet, the sha1 of the exported tree can be -the empty tree (4b825dc642cb6eb9a060e54bf8d69288fbee4904). +has been exported yet, the SHA of the exported tree can be +the empty tree (eg 4b825dc642cb6eb9a060e54bf8d69288fbee4904). For example: diff --git a/doc/news/version_7.20190912/comment_1_58351144b9da1dd19626ba9cdd7502d5._comment b/doc/news/version_7.20190912/comment_1_58351144b9da1dd19626ba9cdd7502d5._comment deleted file mode 100644 index d24167d0fa..0000000000 --- a/doc/news/version_7.20190912/comment_1_58351144b9da1dd19626ba9cdd7502d5._comment +++ /dev/null @@ -1,22 +0,0 @@ -[[!comment format=sh - username="Michael" - avatar="http://cdn.libravatar.org/avatar/86811fdafa094c610ec8ef8858a78dbf" - subject=""Git is older than version 2.22"" - date="2019-09-15T19:33:28Z" - content=""" -Installing 7.20190912 from the \"ancient\" tarball at https://downloads.kitenet.net/git-annex/linux/current/git-annex-standalone-i386-ancient.tar.gz on an x86 Synology NAS, I get - -$ git annex merge - - Git is older than version 2.22 and so it has a memory leak that affects using unlocked files. Recommend you upgrade git before unlocking any files in your repository. -merge git-annex ok -git-annex: thread blocked indefinitely in an MVar operation - -$ which git -/var/services/homes/michael/git-annex.linux/git - -$ /var/services/homes/michael/git-annex.linux/git --version -git version 2.1.4 - -Seems like included git is too old then? -"""]] diff --git a/doc/news/version_7.20190912/comment_2_e90a077f4dd7396a2d3fd26f75e84b64._comment b/doc/news/version_7.20190912/comment_2_e90a077f4dd7396a2d3fd26f75e84b64._comment deleted file mode 100644 index d7bf965d86..0000000000 --- a/doc/news/version_7.20190912/comment_2_e90a077f4dd7396a2d3fd26f75e84b64._comment +++ /dev/null @@ -1,9 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""comment 2""" - date="2019-09-16T16:02:32Z" - content=""" -@Micheal, drat. Opened a bug: -[[bugs/i386ancient_tarball_git_too_old_error]] -(And fixed it; the tarball for this release has been updated.) -"""]] diff --git a/doc/news/version_7.20190912/comment_3_6224ac8ec7f2b5d04ebce3747200ad0d._comment b/doc/news/version_7.20190912/comment_3_6224ac8ec7f2b5d04ebce3747200ad0d._comment deleted file mode 100644 index 93db3e21bc..0000000000 --- a/doc/news/version_7.20190912/comment_3_6224ac8ec7f2b5d04ebce3747200ad0d._comment +++ /dev/null @@ -1,8 +0,0 @@ -[[!comment format=mdwn - username="Michael" - avatar="http://cdn.libravatar.org/avatar/86811fdafa094c610ec8ef8858a78dbf" - subject="comment 3" - date="2019-09-17T19:19:36Z" - content=""" -@joye, thanks! -"""]] diff --git a/doc/news/version_7.20191024.mdwn b/doc/news/version_7.20191024.mdwn deleted file mode 100644 index 8786301d5c..0000000000 --- a/doc/news/version_7.20191024.mdwn +++ /dev/null @@ -1,26 +0,0 @@ -News for git-annex 7.20191024: - - When annex.largefiles is not configured, `git add` and `git commit -a` - add files to git, not to the annex. If you have gotten used to `git add` - adding all files to the annex, you can get that behavior back by running: - git config annex.largefiles anything - -git-annex 7.20191024 released with [[!toggle text="these changes"]] -[[!toggleable text=""" - * Changed git add/git commit -a default behavior back to what it was - before v7; they add file contents to git, not to the annex. - (However, if a file was annexed before, they will still add it to - the annex, to avoid footgun.) - * Configuring annex.largefiles overrides that; once git-annex has - been told which files are large git add/git commit -a will annex them. - * Added annex.gitaddtoannex configuration. Setting it to false prevents - git add from adding files to the annex even when annex.largefiles - is configured. (Unless the file was annexed before.) - * smudge: Made git add smarter about renamed annexed files. It can tell - when an annexed file was renamed, and will add it to the annex, - and not to git, unless annex.largefiles tells it to do otherwise. - * init: Fix a failure when used in a submodule on a crippled filesystem. - * sync: Fix crash when there are submodules and an adjusted branch is - checked out. - * enable-tor: Deal with pkexec changing to root's home directory - when running a command."""]] \ No newline at end of file diff --git a/doc/news/version_7.20191024/comment_1_5c6bebf1ef45af49651c22c085039b3b._comment b/doc/news/version_7.20191024/comment_1_5c6bebf1ef45af49651c22c085039b3b._comment deleted file mode 100644 index 4c9a272cfd..0000000000 --- a/doc/news/version_7.20191024/comment_1_5c6bebf1ef45af49651c22c085039b3b._comment +++ /dev/null @@ -1,9 +0,0 @@ -[[!comment format=mdwn - username="anthony@ad39673d230d75cbfd19d2757d754030049c7673" - nickname="anthony" - avatar="http://cdn.libravatar.org/avatar/05b48b72766177b3b0a6ff4afdb70790" - subject="Thank you" - date="2019-10-25T20:29:46Z" - content=""" -Thank you for all the work you do on git-annex. I appreciate it. -"""]] diff --git a/doc/news/version_7.20191024/comment_2_209826312977675475affe8f669dfa73._comment b/doc/news/version_7.20191024/comment_2_209826312977675475affe8f669dfa73._comment deleted file mode 100644 index 1cdeae256d..0000000000 --- a/doc/news/version_7.20191024/comment_2_209826312977675475affe8f669dfa73._comment +++ /dev/null @@ -1,11 +0,0 @@ -[[!comment format=mdwn - username="nivek-retsof@97a405d1563af7581f6c8d1f7bae67b0ce896721" - nickname="nivek-retsof" - avatar="http://cdn.libravatar.org/avatar/3aec9a88897e105952a4ed38007fb224" - subject="Thanks!" - date="2019-11-05T22:18:49Z" - content=""" -This behavior makes more sense to me. I like keeping my text files in git and binaries in git-annex, and treating them separately is no great mental burden. - -Thanks for your great work. Git-annex has become one of the best parts of my computing experience. It is a great relief knowing that my files are in a robust distributed and redundant collection that is easy to maintain. -"""]] diff --git a/doc/news/version_7.20191106.mdwn b/doc/news/version_7.20191106.mdwn deleted file mode 100644 index e51c99a3dc..0000000000 --- a/doc/news/version_7.20191106.mdwn +++ /dev/null @@ -1,5 +0,0 @@ -git-annex 7.20191106 released with [[!toggle text="these changes"]] -[[!toggleable text=""" - * init: Fix bug that lost modifications to unlocked files when init is - re-ran in an already initialized repo. - * benchmark: Add --databases to benchmark sqlite databases."""]] \ No newline at end of file diff --git a/doc/news/version_7.20191114.mdwn b/doc/news/version_7.20191114.mdwn deleted file mode 100644 index 6dfb3401e3..0000000000 --- a/doc/news/version_7.20191114.mdwn +++ /dev/null @@ -1,10 +0,0 @@ -git-annex 7.20191114 released with [[!toggle text="these changes"]] -[[!toggleable text=""" - * Added annex.allowsign option. - * Make --json-error-messages capture more errors, - particularly url download errors. - * Fix a crash (STM deadlock) when -J is used with multiple files - that point to the same key. - * linuxstandalone: Fix a regression that broke git-remote-https. - * OSX git-annex.app: Fix a problem that prevented using the bundled - git-remote-https, git-remote-http, and git-shell."""]] \ No newline at end of file diff --git a/doc/news/version_7.20200202.7.mdwn b/doc/news/version_7.20200202.7.mdwn new file mode 100644 index 0000000000..09c0f0b972 --- /dev/null +++ b/doc/news/version_7.20200202.7.mdwn @@ -0,0 +1,25 @@ +git-annex 7.20200202.7 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * add: --force-annex/--force-git options make it easier to override + annex.largefiles configuration (and potentially safer as it avoids + bugs like the smudge bug fixed in the last release). + * reinject --known: Fix bug that prevented it from working in a bare repo. + * Support being used in a git repository that uses sha256 rather than sha1. + * initremote, enableremote: Be stricter about rejecting invalid + configuration parameters for remotes, particularly things like foo=true + when foo=yes is expected. + * initremote, enableremote: Reject unknown configuration parameters + provided to these commands. + * initremote: Added --whatelse option, to show additional + configuration parameters you might want to set. Eg: + git annex initremote type=directory encryption=none --whatelse + * Added LISTCONFIGS to external special remote protocol. Special remote + programs that use GETCONFIG/SETCONFIG are recommended to implement it. + * init: Avoid an ugly error message when http remote has no git-annex + uuid configured. + * Support git remotes that need http basic auth to be accessed, + using git credential to get the password. + * Display a warning when concurrency is enabled but ssh connection caching + is not enabled or won't work due to a crippled filesystem. + * Makefile: Move the fish completion to the vendor\_completions.d directory. + * Fixed a test suite failure when run in the C locale."""]] \ No newline at end of file diff --git a/doc/news/version_7.20200204.mdwn b/doc/news/version_7.20200204.mdwn new file mode 100644 index 0000000000..d9eb1a237a --- /dev/null +++ b/doc/news/version_7.20200204.mdwn @@ -0,0 +1,5 @@ +git-annex 7.20200204 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * Fix build with persistent-template 2.8.0. + * Makefile: Really move the fish completion to the + vendor\_completions.d directory."""]] \ No newline at end of file diff --git a/doc/news/version_7.20200219.mdwn b/doc/news/version_7.20200219.mdwn new file mode 100644 index 0000000000..7904474526 --- /dev/null +++ b/doc/news/version_7.20200219.mdwn @@ -0,0 +1,18 @@ +git-annex 7.20200219 released with [[!toggle text="these changes"]] +[[!toggleable text=""" + * Added sync --only-annex, which syncs the git-annex branch and annexed + content but leaves managing the other git branches up to you. + * Added annex.synconlyannex git config setting, which can also be set with + git-annex config to configure sync in all clones of the repo. + * fsck --from remote: Fix a concurrency bug that could make it incorrectly + detect that content in the remote is corrupt, and remove it, resulting in + data loss. + * When git-annex is built with a ssh that does not support ssh connection + caching, default annex.sshcaching to false, but let the user override it. + * Improve warning messages further when ssh connection caching cannot + be used, to clearly state why. + * Avoid throwing fatal errors when asked to write to a readonly + git remote on http. + * Fix support for repositories tuned with annex.tune.branchhash1=true, + including --all not working and git-annex log not displaying anything + for annexed files."""]] \ No newline at end of file diff --git a/doc/projects/dandi.mdwn b/doc/projects/dandi.mdwn index 5e3683cebc..d485a27fc8 100644 --- a/doc/projects/dandi.mdwn +++ b/doc/projects/dandi.mdwn @@ -5,21 +5,21 @@ DANDI: Distributed Archives for Neurophysiology Data Integration is a platform f ## TODOs -[[!inline pages="todo/* and !todo/done and !link(todo/done) and tagged(projects/dandi)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="todo/* and !todo/done and !link(todo/done) and tagged(projects/dandi)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] ### Done: -[[!inline pages="todo/* and !todo/done and link(todo/done) and tagged(projects/dandi)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="todo/* and !todo/done and link(todo/done) and tagged(projects/dandi)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] ## BUGs -[[!inline pages="bugs/* and !bugs/done and !link(bugs/done) and tagged(projects/dandi)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="bugs/* and !bugs/done and !link(bugs/done) and tagged(projects/dandi)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] ### Done: -[[!inline pages="bugs/* and !bugs/done and link(bugs/done) and tagged(projects/dandi)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="bugs/* and !bugs/done and link(bugs/done) and tagged(projects/dandi)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] diff --git a/doc/projects/datalad.mdwn b/doc/projects/datalad.mdwn index 6680e3f860..ca59393598 100644 --- a/doc/projects/datalad.mdwn +++ b/doc/projects/datalad.mdwn @@ -2,14 +2,14 @@ TODOs for DataLad ================= [[!inline pages="todo/* and !todo/done and !link(todo/done) and -(author(yoh) or author(mih) or author(ben) or author(yarikoptic) or author(kyle))" sort=mtime feeds=no actions=yes archive=yes show=0]] +(author(yoh) or author(mih) or author(ben) or author(yarikoptic) or author(kyle))" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]]
Done [[!inline pages="todo/* and !todo/done and link(todo/done) and -(author(yoh) or author(mih) or author(ben) or author(yarikoptic) or author(kyle))" feeds=no actions=yes archive=yes show=0]] +(author(yoh) or author(mih) or author(ben) or author(yarikoptic) or author(kyle))" feeds=no actions=yes archive=yes show=0 template=buglist]]
@@ -17,7 +17,7 @@ My bugs ======= [[!inline pages="bugs/* and !bugs/done and !link(bugs/done) and -(author(yoh) or author(mih) or author(ben) or author(yarikoptic) or author(kyle))" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] +(author(yoh) or author(mih) or author(ben) or author(yarikoptic) or author(kyle))" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist template=buglist]] @@ -25,6 +25,6 @@ My bugs Fixed [[!inline pages="bugs/* and !bugs/done and link(bugs/done) and -(author(yoh) or author(mih) or author(ben) or author(yarikoptic) or author(kyle))" feeds=no actions=yes archive=yes show=0 template=buglist]] +(author(yoh) or author(mih) or author(ben) or author(yarikoptic) or author(kyle))" feeds=no actions=yes archive=yes show=0 template=buglist template=buglist]] diff --git a/doc/projects/neurohub.mdwn b/doc/projects/neurohub.mdwn index 110a4c549b..a501b686e7 100644 --- a/doc/projects/neurohub.mdwn +++ b/doc/projects/neurohub.mdwn @@ -5,21 +5,21 @@ NeuroHub [???](???) ## TODOs -[[!inline pages="todo/* and !todo/done and !link(todo/done) and tagged(projects/neurohub)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="todo/* and !todo/done and !link(todo/done) and tagged(projects/neurohub)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] ### Done: -[[!inline pages="todo/* and !todo/done and link(todo/done) and tagged(projects/neurohub)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="todo/* and !todo/done and link(todo/done) and tagged(projects/neurohub)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] ## BUGs -[[!inline pages="bugs/* and !bugs/done and !link(bugs/done) and tagged(projects/heurohub)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="bugs/* and !bugs/done and !link(bugs/done) and tagged(projects/heurohub)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] ### Done: -[[!inline pages="bugs/* and !bugs/done and link(bugs/done) and tagged(projects/neurohub)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="bugs/* and !bugs/done and link(bugs/done) and tagged(projects/neurohub)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] diff --git a/doc/projects/repronim.mdwn b/doc/projects/repronim.mdwn index 83c1543e5e..e31221ce87 100644 --- a/doc/projects/repronim.mdwn +++ b/doc/projects/repronim.mdwn @@ -5,18 +5,18 @@ The center for Reproducible Neuroimaging computation develops standards and tool ## TODOs -[[!inline pages="todo/* and !todo/done and !link(todo/done) and tagged(projects/repronim)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="todo/* and !todo/done and !link(todo/done) and tagged(projects/repronim)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] ### Done: -[[!inline pages="todo/* and !todo/done and link(todo/done) and tagged(projects/repronim)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="todo/* and !todo/done and link(todo/done) and tagged(projects/repronim)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] ## BUGs -[[!inline pages="bugs/* and !bugs/done and !link(bugs/done) and tagged(projects/repronim)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="bugs/* and !bugs/done and !link(bugs/done) and tagged(projects/repronim)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] ### Done: -[[!inline pages="bugs/* and !bugs/done and link(bugs/done) and tagged(projects/repronim)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="bugs/* and !bugs/done and link(bugs/done) and tagged(projects/repronim)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] diff --git a/doc/scalability.mdwn b/doc/scalability.mdwn index cdc148e2f8..fd33fdf273 100644 --- a/doc/scalability.mdwn +++ b/doc/scalability.mdwn @@ -37,3 +37,5 @@ git-annex is designed for scalability. The key points are: becomes increasingly expensive. Consider adjusting the `annex.queuesize` to a higher value, at the expense of it using more memory. +* See also: [[tips/Repositories_with_large_number_of_files]] + diff --git a/doc/special_remotes/S3.mdwn b/doc/special_remotes/S3.mdwn index 2a1bb5729d..6221dd9557 100644 --- a/doc/special_remotes/S3.mdwn +++ b/doc/special_remotes/S3.mdwn @@ -140,4 +140,9 @@ the S3 remote. then use the same bucket. * `x-amz-meta-*` are passed through as http headers when storing keys - in S3. see [the Internet Archive S3 interface documentation](https://archive.org/help/abouts3.txt) for example headers. + in S3. + +* `x-archive-meta-*` are passed through as http headers when storing keys + in the Internet Archive. See + [the Internet Archive S3 interface documentation](https://archive.org/help/abouts3.txt) + for example headers. diff --git a/doc/special_remotes/S3/comment_33_36788d742259b3b078e02d7b2a251ce5._comment b/doc/special_remotes/S3/comment_33_36788d742259b3b078e02d7b2a251ce5._comment new file mode 100644 index 0000000000..2c47361014 --- /dev/null +++ b/doc/special_remotes/S3/comment_33_36788d742259b3b078e02d7b2a251ce5._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="cnjr2" + avatar="http://cdn.libravatar.org/avatar/f7e9654cc967c8815947b19829bfd746" + subject="comment 33" + date="2020-02-12T10:37:24Z" + content=""" +First of all thanks for `git-annex`! I have recently discovered it and was so pleased to see how it solves problems that I had not anticipated! + +Related to [comment 26](http://git-annex.branchable.com/special_remotes/S3/#comment-e2000bfcb5d4c2d1a017f52c1c02a1ba) above, I want to create a bucket in Frankfurt. It seems like the aws library used by git-annex is from [aristidb/aws](https://github.com/aristidb/aws) where the topic of `V4` signing was raised in [this issue](https://github.com/aristidb/aws/issues/167) ([comment](https://github.com/aristidb/aws/issues/167#issuecomment-258864661) by you joey?). + +Are there workarounds here? I can only use Frankfurt. + +Thanks again! + + +"""]] diff --git a/doc/special_remotes/comment_40_2a36f9c874b00fd5de94836e3dcde782._comment b/doc/special_remotes/comment_40_2a36f9c874b00fd5de94836e3dcde782._comment new file mode 100644 index 0000000000..9f71e141fc --- /dev/null +++ b/doc/special_remotes/comment_40_2a36f9c874b00fd5de94836e3dcde782._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="giuly.ippoliti@c1e2f0d5e40b128900f94f3d107d3719f87c3ff7" + nickname="giuly.ippoliti" + avatar="http://cdn.libravatar.org/avatar/4444ce1930af68fd817e9649d40e0359" + subject="Disable git annex logs" + date="2020-02-14T19:39:26Z" + content=""" +Hi, + +So while writing the globus special remote (git-annex-remote-globus) I often redirect my logs to annex. Nevertheless these logs are always logged out in the console, them being INFO, ERROR, DEBUG and I would like to control that. Is there a way to disable console logging of logs sent back to git annex? + +Something like ANNEX_LOG_LEVEL=self.annex.ERROR + +Thanks !! + +Regards +Giulia +"""]] diff --git a/doc/special_remotes/comment_41_26a29fe6169eee1c62dd10672040851c._comment b/doc/special_remotes/comment_41_26a29fe6169eee1c62dd10672040851c._comment new file mode 100644 index 0000000000..6d46a8c6ab --- /dev/null +++ b/doc/special_remotes/comment_41_26a29fe6169eee1c62dd10672040851c._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 41""" + date="2020-02-17T16:26:45Z" + content=""" +@giuly.ippoliti this is not the best place to ask.. +[[design/external_special_remote_protocol]] is a better place to discuss +special remote implementation. + +Anyway, git-annex's --quiet option will shut up the INFO. +DEBUG is only ever displayed when you use the --debug option. +ERROR should only be used if you have a problem that the user is going to +care about seeing. +"""]] diff --git a/doc/special_remotes/comment_42_85e354eb058aea02be7b847bceb02313._comment b/doc/special_remotes/comment_42_85e354eb058aea02be7b847bceb02313._comment new file mode 100644 index 0000000000..a4a30ba026 --- /dev/null +++ b/doc/special_remotes/comment_42_85e354eb058aea02be7b847bceb02313._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="Dan" + avatar="http://cdn.libravatar.org/avatar/986de9e060699ae70ff7c31342393adc" + subject="Avoid fetching from special remotes" + date="2020-02-19T00:22:04Z" + content=""" +I have a few special remotes configured in my git annex-ed repo. As far as `git` is concerned, these are just ordinary remotes (they have entries in the `.git/config` file). As a result, when I do something like `git fetch --all` it still tries to fetch from them. This of course fails (since they aren't actually git repositories). It isn't the end of the world, but causes some extra noise in my output (I use `magit` in emacs for most of my `git` workflow, so I have to drill down a bit to see what actually failed and then dismiss it as an expected failure). + +It'd be nice if I could just prevent `git` from fetching from these special remotes. Is there a clever way I can set `remote..fetch` in the local config so as to leave these remotes alone when fetching? +"""]] diff --git a/doc/special_remotes/comment_43_75a7c567db2b5256182b2cf06091dc12._comment b/doc/special_remotes/comment_43_75a7c567db2b5256182b2cf06091dc12._comment new file mode 100644 index 0000000000..58ab3aba2a --- /dev/null +++ b/doc/special_remotes/comment_43_75a7c567db2b5256182b2cf06091dc12._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""Re: Avoid fetching from special remotes""" + date="2020-02-19T16:38:44Z" + content=""" +@Dan, set remote.name.skipFetchAll to true. Or make a remote group containing +the remotes you do want to fetch from. +"""]] diff --git a/doc/special_remotes/external/example.sh b/doc/special_remotes/external/example.sh index fe1d9380ec..cfae74ae25 100755 --- a/doc/special_remotes/external/example.sh +++ b/doc/special_remotes/external/example.sh @@ -137,7 +137,7 @@ doremove () { local loc="$2" # Note that it's not a failure to remove a - # fike that is not present. + # file that is not present. if [ -e "$loc" ]; then if runcmd rm -f "$loc"; then echo REMOVE-SUCCESS "$key" @@ -155,6 +155,12 @@ echo VERSION 1 while read line; do set -- $line case "$1" in + LISTCONFIGS) + # One CONFIG line for each setting that we GETCONFIG + # later. + echo CONFIG directory store data here + echo CONFIGEND + ;; INITREMOTE) # Do anything necessary to create resources # used by the remote. Try to be idempotent. diff --git a/doc/templates/buglist.tmpl b/doc/templates/buglist.tmpl index a5d93e9bbc..70e60bfeb3 100644 --- a/doc/templates/buglist.tmpl +++ b/doc/templates/buglist.tmpl @@ -5,9 +5,11 @@ + [] +
diff --git a/doc/thanks/list b/doc/thanks/list index 3a5de35e3e..af577f0181 100644 --- a/doc/thanks/list +++ b/doc/thanks/list @@ -71,3 +71,4 @@ Svenne Krap, Jelmer Vernooij, Rian McGuire, Cesar, +LND, diff --git a/doc/tips/Repositories_with_large_number_of_files/comment_7_f56f1ffeb5e1f382d3227b4ace7f84a1._comment b/doc/tips/Repositories_with_large_number_of_files/comment_7_f56f1ffeb5e1f382d3227b4ace7f84a1._comment new file mode 100644 index 0000000000..56a7373957 --- /dev/null +++ b/doc/tips/Repositories_with_large_number_of_files/comment_7_f56f1ffeb5e1f382d3227b4ace7f84a1._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="breck7@4bbace32c18ebb98fc730d56c6ed76b7bcc0798e" + nickname="breck7" + avatar="http://cdn.libravatar.org/avatar/96a1ef57fee8349bd6f92faf153f9ec3" + subject="Thank you" + date="2020-01-30T15:15:24Z" + content=""" +I just ran into related git scaling issues while trying to make a repo in a folder with 6.5 million files. [Writeup](https://breckyunits.com/building-a-treebase-with-6-point-5-million-files.html). I did things the naive/wrong way on purpose, but now I'm curious if I could actually get this work fast, and your pointers are a good help. Thanks! +"""]] diff --git a/doc/tips/automatically_adding_metadata/comment_10_cf770ba8eed7963f08517877bd460d3f._comment b/doc/tips/automatically_adding_metadata/comment_10_cf770ba8eed7963f08517877bd460d3f._comment new file mode 100644 index 0000000000..8cb52f1924 --- /dev/null +++ b/doc/tips/automatically_adding_metadata/comment_10_cf770ba8eed7963f08517877bd460d3f._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="max" + avatar="http://cdn.libravatar.org/avatar/ebbd4dd37b341d018077a12e7ccf9fbd" + subject="adding support for additional metadata tools?" + date="2020-02-10T04:45:55Z" + content=""" +hi there, sincere gratitude for your work on git-annex. + +is it worth considering a more modular setup (maybe a python script :) or even haskell) for working with metadata extractors? + +currently it really only supports field-based filtering extractors. i am particularly interested in using (minimally): + +mp3info - offers a formatting string for printing but doesn't fit super nicely into the \"want\" fields in shell script. could probably be hacked in. + +mp4info - does not seem to offer any native parameters for filtering; would likely need some engineering/thought about how to take in all possible fields then filtering off of those. + +alternatively i could just write some personal scripts here, but just thought others would find it useful for auto extracting content from mp3/m4a files. extract doesn't seem to perform as well on these as, say, .flac files. + +thanks again! +"""]] diff --git a/doc/tips/automatically_adding_metadata/comment_11_6eeb21b66aa3541491ddc0dd3058ddc7._comment b/doc/tips/automatically_adding_metadata/comment_11_6eeb21b66aa3541491ddc0dd3058ddc7._comment new file mode 100644 index 0000000000..ad09dd6361 --- /dev/null +++ b/doc/tips/automatically_adding_metadata/comment_11_6eeb21b66aa3541491ddc0dd3058ddc7._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""re: adding support for additional metadata tools?""" + date="2020-02-17T16:55:20Z" + content=""" +@max, notice that the hook script contains special case handling for +exiftool, including a config option for it. That was contributed by +Klaus Ethgen. I'd be inclined to merge patches that add handling +for other tools. + +I imagine you could add config settings for the format string etc. +"""]] diff --git a/doc/tips/disabling_a_special_remote.mdwn b/doc/tips/disabling_a_special_remote.mdwn index 3b80ef25ce..c14b383702 100644 --- a/doc/tips/disabling_a_special_remote.mdwn +++ b/doc/tips/disabling_a_special_remote.mdwn @@ -9,7 +9,7 @@ simply to use the `remote.name.annex-ignore` configuration documented in the main [[git-annex]] manpage. For example, to disable the web remote, you would use: - git config remote.web.annex-ignore false + git config remote.web.annex-ignore true The result would be: diff --git a/doc/tips/largefiles.mdwn b/doc/tips/largefiles.mdwn index 7785daacd8..b976615ba5 100644 --- a/doc/tips/largefiles.mdwn +++ b/doc/tips/largefiles.mdwn @@ -18,7 +18,7 @@ and `git add`/`git commit -a` will add the large files to the annex and the small files to git. Other commands that use the annex.largefiles configuration include -`git annex import`, git annex addurl`, `git annex importfeed`, and +`git annex import`, `git annex addurl`, `git annex importfeed`, and the assistant. ## examples diff --git a/doc/tips/largefiles/comment_15_4ad7931038b50344b4238459bd79f74f._comment b/doc/tips/largefiles/comment_15_4ad7931038b50344b4238459bd79f74f._comment deleted file mode 100644 index a12c88ae28..0000000000 --- a/doc/tips/largefiles/comment_15_4ad7931038b50344b4238459bd79f74f._comment +++ /dev/null @@ -1,17 +0,0 @@ -[[!comment format=mdwn - username="Dwk" - avatar="http://cdn.libravatar.org/avatar/65fade4f1582ef3f00e9ad6ae27dae56" - subject="annexed -> normal git does not work if annexed file was unlocked" - date="2019-10-05T01:45:14Z" - content=""" -The sequence of commands given to shift an unlocked annexed file to normal git (for instance a file annexed by mistake due to the new git-add behaviour), namely - - git annex unlock file - git -c annex.largefiles=nothing add file - git commit file - -does not work if the file is unmodified, as git sees no change to commit. In this case, I believe one should replace the first command with - - git annex unannex file - -"""]] diff --git a/doc/tips/largefiles/comment_16_00304872f9750e515c66fad68826e4ff._comment b/doc/tips/largefiles/comment_16_00304872f9750e515c66fad68826e4ff._comment deleted file mode 100644 index 40a7ad6b37..0000000000 --- a/doc/tips/largefiles/comment_16_00304872f9750e515c66fad68826e4ff._comment +++ /dev/null @@ -1,8 +0,0 @@ -[[!comment format=mdwn - username="joey" - subject="""Re: annexed -> normal git does not work if annexed file was unlocked""" - date="2019-10-08T18:14:45Z" - content=""" -Touching the file first avoids the problem, or git rm --cached to force -git to re-add it. I've updated the example. -"""]] diff --git a/doc/tips/local_caching_of_annexed_files/comment_22_b047ea1c6b9247a42b560a111d4fdeae._comment b/doc/tips/local_caching_of_annexed_files/comment_22_b047ea1c6b9247a42b560a111d4fdeae._comment new file mode 100644 index 0000000000..7c24be4fe8 --- /dev/null +++ b/doc/tips/local_caching_of_annexed_files/comment_22_b047ea1c6b9247a42b560a111d4fdeae._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="preferred way to automate population of the cache upon `get`" + date="2020-01-13T19:01:07Z" + content=""" +``` +# Populating the cache +For the cache to be used, you need to get file contents into it somehow. A simple way to do that is, in a git-annex repository that already contains the content of files: + + git annex copy --to cache + +You could run that anytime after you get content. There are also ways to automate it, but getting some files into the cache manually is a good enough start. +``` + +Hi Joey. What would be the preferred way you would advise to use (ideally with minimal manual configuration) to make it happen? I.e. whenever user(s) `get` some load, it gets automagically `annex copied` to the cache? +"""]] diff --git a/doc/todo.mdwn b/doc/todo.mdwn index a4ee9d55df..2046d39d38 100644 --- a/doc/todo.mdwn +++ b/doc/todo.mdwn @@ -1,5 +1,7 @@ -This is git-annex's todo list. Link items to [[todo/done]] when done. A more complete [[design/roadmap/]] is also available. +This is git-annex's todo list. Link items to [[todo/done]] when done, +or tags: [[todo/confirmed]] [[todo/moreinfo]] [[todo/needsthought]] [[todo/unlikely]] [[!inline pages="./todo/* and !./todo/*/* and !./todo/done and !link(done) -and !*/Discussion" actions=yes postform=yes postformtext="Add a new todo titled:" -show=0 feedlimit=10 archive=yes]] +and !*/Discussion and !./todo/moreinfo and !./todo/confirmed +and !./todo/needsthought and !./todo/unlikely" actions=yes postform=yes postformtext="Add a new todo titled:" +show=0 feedlimit=10 archive=yes template=buglist]] diff --git a/doc/todo/--get_option_for_diffdriver.mdwn b/doc/todo/--get_option_for_diffdriver.mdwn index d6ad9a2b98..471814ae9d 100644 --- a/doc/todo/--get_option_for_diffdriver.mdwn +++ b/doc/todo/--get_option_for_diffdriver.mdwn @@ -1 +1,3 @@ since there is no generic 'fuse' mode, I would like to request to have `--get` (or `--auto-get`) option for diffdriver. I am trying to compare files across two branches on a repo I just cloned. I cannot download all the files and downloading differing keys across branches for the same file is a bit painful. So I felt that it would be super nice if git annex could auto get those files from somewhere (well -- original clone) + +[[!tag confirmed]] diff --git a/doc/todo/Alternative_mode_control_for_import.mdwn b/doc/todo/Alternative_mode_control_for_import.mdwn index 24bbd5ace5..41a9b8bcbd 100644 --- a/doc/todo/Alternative_mode_control_for_import.mdwn +++ b/doc/todo/Alternative_mode_control_for_import.mdwn @@ -15,3 +15,6 @@ Apologies for the brevity, I've already typed this out once.. git annex import --mode=Ns $src # (just creates symlinks for new) git annex import --mode=Nsd $src # (invalid mode due to data loss) git annex import --mode=Nid $src # (invalid or require --force) + +> Current thinking is in [[remove_legacy_import_directory_interface]]. +> This old todo is redundant, so [[wontfix|done]] --[[Joey]] diff --git a/doc/todo/Bidirectional_metadata.mdwn b/doc/todo/Bidirectional_metadata.mdwn index cdbdf614c5..c455cf03ce 100644 --- a/doc/todo/Bidirectional_metadata.mdwn +++ b/doc/todo/Bidirectional_metadata.mdwn @@ -19,3 +19,5 @@ There are other situations this is useful (and I use), for example, when I conve git annex metadata --parentchild original.svg compressed.png and this would set 'parent' and 'child' metadata respectively. + +[[!tag needsthought]] diff --git a/doc/todo/Describe_a_file_in_function_of_another_file.mdwn b/doc/todo/Describe_a_file_in_function_of_another_file.mdwn index 4df53b795a..4993b3b3ad 100644 --- a/doc/todo/Describe_a_file_in_function_of_another_file.mdwn +++ b/doc/todo/Describe_a_file_in_function_of_another_file.mdwn @@ -13,4 +13,5 @@ You may ask why it is useful? I have several usecases: Does git-annex provide such functionnality? If not, do you think it could be implementable? Thanks! - + +[[!tag unlikely]] diff --git a/doc/todo/Don__39__t_re-encrypt_when_key_is_already_in_.git__47__annex__47__tmp.mdwn b/doc/todo/Don__39__t_re-encrypt_when_key_is_already_in_.git__47__annex__47__tmp.mdwn new file mode 100644 index 0000000000..6a461cffff --- /dev/null +++ b/doc/todo/Don__39__t_re-encrypt_when_key_is_already_in_.git__47__annex__47__tmp.mdwn @@ -0,0 +1 @@ +I've implemented true resumable upload in git-annex-remote-googledrive which means that uploads can, just as downloads, be resumed at any point, even within one chunk. However, it currently does not work with encrypted files (or chunks) due to the non-deterministic nature of GPG. In order to make this feature useable on encrypted files, I propose to not overwrite encrypted files which are already present inside the `tmp` directory. diff --git a/doc/todo/Don__39__t_re-encrypt_when_key_is_already_in_.git__47__annex__47__tmp/comment_1_0650918c86fca0554755aede19a12fd3._comment b/doc/todo/Don__39__t_re-encrypt_when_key_is_already_in_.git__47__annex__47__tmp/comment_1_0650918c86fca0554755aede19a12fd3._comment new file mode 100644 index 0000000000..01ebb8c021 --- /dev/null +++ b/doc/todo/Don__39__t_re-encrypt_when_key_is_already_in_.git__47__annex__47__tmp/comment_1_0650918c86fca0554755aede19a12fd3._comment @@ -0,0 +1,53 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-02-17T15:33:31Z" + content=""" +@lykos, what happens when git-annex-remote-googledrive tries +to resume in this situation and git-annex has written a different tmp file +than what it partially uploaded before? + +I imagine it might resume after the last byte it sent before, and so +the uploaded file gets corrupted? + +If so, there are two hard problems with this idea: + +1. If git-annex changes to reuse the same tmp file, then git-annex-remote-googledrive + will work with the new git-annex, but corrupt files when used with an old + git-annex. +2. If someone has two clones, and starts an upload in one, but it's + interrupted and then started later in the second clone, it would again + corrupt the file that gets uploaded. (This would also happen, + with a single clone, if git-annex unused gets used in between upload + attempts, and cleans up the tmp file.) + +The first could be dealt with by some protocol flag, but the second seems +rather intractable, if git-annex-remote-googledrive behaves as I +hypothesize it might. And even if git-annex-remote-googledrive behaves +better that that somehow, it's certianly likely that some other remote +would behave that way at some point. + +---- + +As to implementation details, I started investigating before thinking +about the above problem, so am leaving some notes here: + +This would first require that the tmp file is written atomically, +otherwise an interruption in the wrong place would resume with a partial +file. (File size can't be used since gpg changes the file size with +compression etc.) Seems easy to implement: Make +Remote.Helper.Special.fileStorer write to a different tmp file and rename +it into place. + +Internally, git-annex pipes the content from gpg, so it is only written to +a temp file when using a remote that operates on files, as the external +remotes do. Some builtin remotes don't. So resuming an upload to an +encrypted remote past the chunk level can't work in general. + +There would need to be some way for the code that encrypts chunks +(or whole objects) to detect that it's being used with a remote that +operates on files, and then check if the tmp file already exists, and avoid +re-writing it. This would need some way to examine a `Storer` and tell +if it operates on files, which is not currently possisble, so would need +some change to the data type. +"""]] diff --git a/doc/todo/Invert_remote_selection.mdwn b/doc/todo/Invert_remote_selection.mdwn index a091569f27..a30b23093f 100644 --- a/doc/todo/Invert_remote_selection.mdwn +++ b/doc/todo/Invert_remote_selection.mdwn @@ -28,3 +28,5 @@ This problem comes up surprisingly often due to: 5. Some repos being too large for a machine (e.g., repacking fails due to low memory), but which can still act like a dumb file-store. The problem gets worse when you have a lot of remotes or a lot of repos to manage (I have both). My impression is that this feature would require a syntax addition for git-annex-sync only. I like '!' because it behaves the same in GNU find and sh. + +[[!tag needsthought]] diff --git a/doc/todo/Invert_remote_selection/comment_4_f5ab9eec7ed0f080c57dbb594deafd13._comment b/doc/todo/Invert_remote_selection/comment_4_f5ab9eec7ed0f080c57dbb594deafd13._comment new file mode 100644 index 0000000000..0e80fa2272 --- /dev/null +++ b/doc/todo/Invert_remote_selection/comment_4_f5ab9eec7ed0f080c57dbb594deafd13._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2020-01-30T19:13:25Z" + content=""" +git-annex sync does support remote groups, so that might also help with +this use case without needing additional syntax? +"""]] diff --git a/doc/todo/MD5E_keys_without_file_size.mdwn b/doc/todo/MD5E_keys_without_file_size.mdwn index ca88550db9..12d50a9d92 100644 --- a/doc/todo/MD5E_keys_without_file_size.mdwn +++ b/doc/todo/MD5E_keys_without_file_size.mdwn @@ -1,3 +1,5 @@ Would it be hard to support MD5E keys that omit the -sSIZE part, the way this is allowed for URL keys? I have a use case where I have the MD5 hashes and filenames of files stored in the cloud, but not their sizes, and want to construct keys for these files to use with setpresentkey and registerurl. I could construct URL keys, but then I lose the error-checking and have to set annex.security.allow-unverified-downloads . Or maybe, extend URL keys to permit an -hMD5 hash to be part of the key? Another (and more generally useful) solution would be [[todo/alternate_keys_for_same_content/]]. Then can start with a URL-based key but then attach an MD5 to it as metadata, and have the key treated as a checksum-containing key, without needing to migrate the contents to a new key. + +[[!tag moreinfo]] diff --git a/doc/todo/Natively_support_s3__58____47____47___urls___40__for_addurl__44___get__44___etc__41__.mdwn b/doc/todo/Natively_support_s3__58____47____47___urls___40__for_addurl__44___get__44___etc__41__.mdwn index 50caf8cd51..d2a7866a89 100644 --- a/doc/todo/Natively_support_s3__58____47____47___urls___40__for_addurl__44___get__44___etc__41__.mdwn +++ b/doc/todo/Natively_support_s3__58____47____47___urls___40__for_addurl__44___get__44___etc__41__.mdwn @@ -24,3 +24,4 @@ git-annex version: 6.20180913+git33-g2cd5a723f-1~ndall+1 [[!meta author=yoh]] [[!tag projects/datalad]] +[[!tag moreinfo unlikely]] diff --git a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation.mdwn b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation.mdwn index a17aa8f02d..dbbf201b00 100644 --- a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation.mdwn +++ b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation.mdwn @@ -12,3 +12,4 @@ If needed example, here is http://datasets.datalad.org/allen-brain-observatory/v [[!meta author=yoh]] [[!tag projects/dandi]] +[[!tag needsthought]] diff --git a/doc/todo/S3_export_redirecting_to_key-value_store.mdwn b/doc/todo/S3_export_redirecting_to_key-value_store.mdwn index 2aab376bf0..942e66a441 100644 --- a/doc/todo/S3_export_redirecting_to_key-value_store.mdwn +++ b/doc/todo/S3_export_redirecting_to_key-value_store.mdwn @@ -1,3 +1,5 @@ S3 lets you [redirect](https://docs.aws.amazon.com/AmazonS3/latest/dev/how-to-page-redirect.html) requests for an object to another object, or to a URL. This could be used to export a git branch, in the manner of [[`git-annex-export`|git-annex-export]], but with annexed objects redirecting to a key-value S3 remote in the same bucket. Related: [[todo/simpler__44___trusted_export_remotes]] ; [[forum/Using_hashdirlower_layout_for_S3_special_remote]]. + +[[!tag needsthought unlikely]] diff --git a/doc/todo/Wishlist__58___Parity_files_on_all_files.mdwn b/doc/todo/Wishlist__58___Parity_files_on_all_files.mdwn index 01ebf8ce52..7e6d44aecd 100644 --- a/doc/todo/Wishlist__58___Parity_files_on_all_files.mdwn +++ b/doc/todo/Wishlist__58___Parity_files_on_all_files.mdwn @@ -63,3 +63,5 @@ Thankfully, we already have a technology that can fill in elegantly here: parity This would also enhance the data-checking capabilities of git-annex, as data loss could be fixed and new parity files generated from the recovered files transparently, self-healing the archive. + +[[!tag unlikely]] diff --git a/doc/todo/add_import_--to_command.mdwn b/doc/todo/add_import_--to_command.mdwn index ed4c6066db..2e833e99e8 100644 --- a/doc/todo/add_import_--to_command.mdwn +++ b/doc/todo/add_import_--to_command.mdwn @@ -4,3 +4,5 @@ I have a bunch of files I want to track with `git-annex` that are sitting in an git-annex import --to=s3-remote /mnt/usb-drive/myfiles The proposed `--to=remote` option would add the files to my repo as `import` normally does, but it wouldn't every keep the content in the repo, the only copy would now sit in `s3-remote`. As little disk space as possible would be staged temporarily in `~/my-laptop-repo`. Perhaps the easiest option would be to import a file normally, but them immediately do a `move` to `s3-remote`? But, ideally for larger files, we would want to stream them directly from `/mnt/usb-drive/myfiles` to `s3-remote` without ever staging them at `~/my-laptop-repo`. + +[[!tag unlikely needsthought]] diff --git a/doc/todo/add_limit_to_matching_options.mdwn b/doc/todo/add_limit_to_matching_options.mdwn index c009c87c43..7bcdce2a7c 100644 --- a/doc/todo/add_limit_to_matching_options.mdwn +++ b/doc/todo/add_limit_to_matching_options.mdwn @@ -12,3 +12,5 @@ I often transfer files via mediums that have transfer limits, but I am eventuall Currently, I've been using tricks to select a subset of the files, such as a range of file-sizes. + +[[!tag needsthought]] diff --git a/doc/todo/add_sftp_special_remote.mdwn b/doc/todo/add_sftp_special_remote.mdwn index 690566688b..b25f294114 100644 --- a/doc/todo/add_sftp_special_remote.mdwn +++ b/doc/todo/add_sftp_special_remote.mdwn @@ -21,3 +21,5 @@ repeatedly (though ssh connection caching helps some with that). > exposes this, when available. Some sftp servers can be locked down > so that the user can't run git-annex on them, so that could be the only > way to get diskreserve working for such a remote. --[[Joey]] + +[[!tag confirmed]] diff --git a/doc/todo/add_tests_under_concurrency.mdwn b/doc/todo/add_tests_under_concurrency.mdwn index 6421d3f3af..3363bd0c02 100644 --- a/doc/todo/add_tests_under_concurrency.mdwn +++ b/doc/todo/add_tests_under_concurrency.mdwn @@ -1 +1,3 @@ To [[git-annex-test]] and [[git-annex-testremote]], add option to run tests under concurrency (-J). Many possible bugs are unique to the concurrent case, and it's the case I often use. While any bugs detected may be hard to reproduce, it's important to know _whether_ there are concurrency-related bugs. Much of the trust in git-annex comes from its extensive test suite, but it's somewhat concerning to trust it with important data when the concurrency case is not tested at all. + +[[!tag unlikely]] diff --git a/doc/todo/add_xxHash_backend.mdwn b/doc/todo/add_xxHash_backend.mdwn index cf9143ea09..8433047735 100644 --- a/doc/todo/add_xxHash_backend.mdwn +++ b/doc/todo/add_xxHash_backend.mdwn @@ -1 +1,3 @@ From https://cyan4973.github.io/xxHash/ , xxHash seems much faster than md5 with comparable quality. There's a Haskell implementation. + +[[!tag moreinfo]] diff --git a/doc/todo/add_xxHash_backend/comment_1_8993f493a8b43ffda9f8e0b6d5e186ac._comment b/doc/todo/add_xxHash_backend/comment_1_8993f493a8b43ffda9f8e0b6d5e186ac._comment new file mode 100644 index 0000000000..40e7e02f20 --- /dev/null +++ b/doc/todo/add_xxHash_backend/comment_1_8993f493a8b43ffda9f8e0b6d5e186ac._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-06T19:30:43Z" + content=""" +I looked at xxHash recently. I can't seem to find benchmarks of it compared +with other fast hashes like Blake2. +"""]] diff --git a/doc/todo/add_xxHash_backend/comment_2_945217badfaeba8672329d4f6ac4b8e2._comment b/doc/todo/add_xxHash_backend/comment_2_945217badfaeba8672329d4f6ac4b8e2._comment new file mode 100644 index 0000000000..1339435f8f --- /dev/null +++ b/doc/todo/add_xxHash_backend/comment_2_945217badfaeba8672329d4f6ac4b8e2._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-01-09T20:52:25Z" + content=""" +Let alone blake3, which is 5-6 times as fast as blake2 while still +apparently being a cryptographically secure hash. +"""]] diff --git a/doc/todo/addunlocked_config_setting/comment_5_ef936e0beb9d3446a4894885f065eecf._comment b/doc/todo/addunlocked_config_setting/comment_5_ef936e0beb9d3446a4894885f065eecf._comment new file mode 100644 index 0000000000..8892334d34 --- /dev/null +++ b/doc/todo/addunlocked_config_setting/comment_5_ef936e0beb9d3446a4894885f065eecf._comment @@ -0,0 +1,83 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="what am I doing wrong?" + date="2020-01-13T20:05:38Z" + content=""" +I have tried to use this but I do not see it in effect: + +[[!format sh \"\"\" +$> mkdir repo && cd repo && git init && git annex init && git annex config --set addunlocked anything && git show git-annex:config.log && touch 1 2 && git add 1 && git annex add 2 && git commit -m 'committing' && ls -l && git show +Initialized empty Git repository in /tmp/repo/.git/ +init (scanning for unlocked files...) +ok +(recording state in git...) +addunlocked anything ok +(recording state in git...) +1578945668.466039639s addunlocked anything +add 2 +ok +(recording state in git...) +[master (root-commit) e428211] committing + 2 files changed, 1 insertion(+) + create mode 100644 1 + create mode 120000 2 +total 4 +-rw------- 1 yoh yoh 0 Jan 13 15:01 1 +lrwxrwxrwx 1 yoh yoh 178 Jan 13 15:01 2 -> .git/annex/objects/pX/ZJ/SHA256E-s0--e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/SHA256E-s0--e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 +commit e428211fe0c64e67cf45d8c92165c866db5ba75f (HEAD -> master) +Author: Yaroslav Halchenko +Date: Mon Jan 13 15:01:08 2020 -0500 + + committing + +diff --git a/1 b/1 +new file mode 100644 +index 0000000..e69de29 +diff --git a/2 b/2 +new file mode 120000 +index 0000000..ea46194 +--- /dev/null ++++ b/2 +@@ -0,0 +1 @@ ++.git/annex/objects/pX/ZJ/SHA256E-s0--e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/SHA256E-s0--e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + +\"\"\"]] + +so I have tried to say that \"anything\" (all files) should be added unlocked. But it seems that neither file (`1` added via `git add` and `2` added via `git annex add`) were added unlocked. + +
+Here is some info on version/config: (click to expand) + + +[[!format sh \"\"\" +(git-annex)lena:/tmp/repo[master] +$> cat .git/config +[core] + repositoryformatversion = 0 + filemode = true + bare = false + logallrefupdates = true +[annex] + uuid = f220cc03-1510-4e23-acb5-b95723ecf9fc + version = 7 +[filter \"annex\"] + smudge = git-annex smudge -- %f + clean = git-annex smudge --clean -- %f +(dev3) 1 17256.....................................:Mon 13 Jan 2020 03:03:30 PM EST:. +(git-annex)lena:/tmp/repo[master] +$> git annex version +git-annex version: 7.20191230+git2-g2b9172e98-1~ndall+1 +build flags: Assistant Webapp Pairing S3 WebDAV Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite +dependency versions: aws-0.20 bloomfilter-2.0.1.0 cryptonite-0.25 DAV-1.3.3 feed-1.0.1.0 ghc-8.6.5 http-client-0.5.14 persistent-sqlite-2.9.3 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs hook external +operating system: linux x86_64 +supported repository versions: 7 +upgrade supported from repository versions: 0 1 2 3 4 5 6 +local repository version: 7 + +\"\"\"]] + +
+"""]] diff --git a/doc/todo/addunlocked_config_setting/comment_6_838a093daae08837f37ec00482839d3c._comment b/doc/todo/addunlocked_config_setting/comment_6_838a093daae08837f37ec00482839d3c._comment new file mode 100644 index 0000000000..163fbc662d --- /dev/null +++ b/doc/todo/addunlocked_config_setting/comment_6_838a093daae08837f37ec00482839d3c._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="kyle" + avatar="http://cdn.libravatar.org/avatar/7d6e85cde1422ad60607c87fa87c63f3" + subject="re: what am I doing wrong?" + date="2020-01-14T03:19:19Z" + content=""" +I believe that should be `git annex config --set annex.addunlocked anything` (i.e. an \"annex.\" in front of the name). +"""]] diff --git a/doc/todo/addurl_improvements.mdwn b/doc/todo/addurl_improvements.mdwn index 17ed129e8b..35129f3d57 100644 --- a/doc/todo/addurl_improvements.mdwn +++ b/doc/todo/addurl_improvements.mdwn @@ -8,7 +8,7 @@ When an external special remote tells git-annex a fuller URL for a given file, g It would be better if, in the above log, the URL key was based on dx://file-FJZjVx001pB2BQPVKY4zX8kk/A4.assembly1-trinity.fasta , which would preserve the .fasta extension in the key and therefore in the symlink target. -> [fixed|done]] --[[Joey]] +> [[fixed|done]] --[[Joey]] Also, it would be good if the external special remote could return an etag for the URL, which would be a value guaranteed to change if the URL's diff --git a/doc/todo/alternate_keys_for_same_content.mdwn b/doc/todo/alternate_keys_for_same_content.mdwn index fcc4bf99b7..81e93be62d 100644 --- a/doc/todo/alternate_keys_for_same_content.mdwn +++ b/doc/todo/alternate_keys_for_same_content.mdwn @@ -9,3 +9,4 @@ Also, sometimes one can determine the MD5 from the URL without downloading the f or because an MD5 was computed by a workflow manager that produced the file (Cromwell does this). The special remote's "CHECKURL" implementation could record an MD5E key in the alt_keys metadata field of the URL key. Then 'addurl --fast' could check alt_keys, and store in git an MD5E key rather than a URL key, if available. +[[!tag unlikely]] diff --git a/doc/todo/alternate_keys_for_same_content/comment_1_7a7f287bcde5353072100294dd8edce6._comment b/doc/todo/alternate_keys_for_same_content/comment_1_7a7f287bcde5353072100294dd8edce6._comment new file mode 100644 index 0000000000..bb54600244 --- /dev/null +++ b/doc/todo/alternate_keys_for_same_content/comment_1_7a7f287bcde5353072100294dd8edce6._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-30T18:36:17Z" + content=""" +This would mean that, every time something about a key is looked up in the +git-annex branch, it would also need to look at the metadata to see if this +`alt_keys` field is set. + +So it doubles the time of every single query of the git-annex branch. + +I don't think that's a good idea, querying the git-annex branch is already +often a bottleneck to commands. +"""]] diff --git a/doc/todo/alternate_keys_for_same_content/comment_2_0f464f4970e499371fcb65e0d06202cf._comment b/doc/todo/alternate_keys_for_same_content/comment_2_0f464f4970e499371fcb65e0d06202cf._comment new file mode 100644 index 0000000000..2eddbbf4de --- /dev/null +++ b/doc/todo/alternate_keys_for_same_content/comment_2_0f464f4970e499371fcb65e0d06202cf._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="alternate keys" + date="2020-01-31T19:23:35Z" + content=""" +\"every time something about a key is looked up in the git-annex branch, it would also need to look at the metadata to see if this alt_keys field is set\" -- not every time, just when checking if the key is checksum-based, and if content matches the checksum. Also, isn't metadata [[cached in a database|design/caching_database]]? +"""]] diff --git a/doc/todo/alternate_keys_for_same_content/comment_3_c99b23e878e37e205a3182d3b6d3f2b2._comment b/doc/todo/alternate_keys_for_same_content/comment_3_c99b23e878e37e205a3182d3b6d3f2b2._comment new file mode 100644 index 0000000000..857b25bc12 --- /dev/null +++ b/doc/todo/alternate_keys_for_same_content/comment_3_c99b23e878e37e205a3182d3b6d3f2b2._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="https://christian.amsuess.com/chrysn" + nickname="chrysn" + avatar="http://christian.amsuess.com/avatar/c6c0d57d63ac88f3541522c4b21198c3c7169a665a2f2d733b4f78670322ffdc" + subject="Re: comment 1 " + date="2020-01-31T19:47:59Z" + content=""" +The proposed implementation may be inefficient, but the idea has merit. + +What if that information is stored in a place where it can be used to verify migrations? + +For example, when entering that the migrating remote dropped the data into `git-annex:aaa/bbb/SHA1-s1234--somehash.log`, somewhere near there a record could be added that this was migrated to SHA512-s1234--longerhash. When then all the other remotes are asked to drop that file, they can actually do that because they see that it has been migrated, can verify the migration and are free to drop the file. + +Even later, when a remote wants to get an old name (eg. because it checked out an old version of master), it can look up the key, find where it was migrated to, and make the data available under its own name (by copying, or maybe by placing a symlink pointing file from `.git/annex/objects/Aa/Bb/SHA1-s1234--somehash/SHA1-s1234--somehash` to the new. +"""]] diff --git a/doc/todo/alternate_keys_for_same_content/comment_4_a5fb6045595da0c490098e46f76db9b8._comment b/doc/todo/alternate_keys_for_same_content/comment_4_a5fb6045595da0c490098e46f76db9b8._comment new file mode 100644 index 0000000000..a37c5b6224 --- /dev/null +++ b/doc/todo/alternate_keys_for_same_content/comment_4_a5fb6045595da0c490098e46f76db9b8._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="comment 4" + date="2020-01-31T20:32:00Z" + content=""" +\"can be used to verify migrations\" -- my hope was to *avoid* migrations, i.e. to get the benefit you'd get from migrating to a checksum-based key, without doing the migration. +"""]] diff --git a/doc/todo/alternate_keys_for_same_content/comment_5_230d35bd623189818002901455964ca4._comment b/doc/todo/alternate_keys_for_same_content/comment_5_230d35bd623189818002901455964ca4._comment new file mode 100644 index 0000000000..4be01e4257 --- /dev/null +++ b/doc/todo/alternate_keys_for_same_content/comment_5_230d35bd623189818002901455964ca4._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="simpler proposal" + date="2020-01-31T21:46:57Z" + content=""" +So, to fully and properly implement what the title of this todo suggests -- \"alternate keys for same content\" -- might be hard. But to simply enable adding checksums to WORM/URL keys, stored separately on the git-annex branch rather than encoded in the key's name, is simpler. This would let some WORM/URL keys to be treated as checksum-based keys when getting contents from untrusted remotes or when checking integrity with `git-annex-fsck`. But this isn't really \"alternate keys for same content\": the content would be stored under only the WORM/URL key under which it was initially recorded. The corresponding MD5 key would not be recorded in [[location_tracking]] as present. + +Checking whether a WORM/URL key has an associated checksum could be sped up by keeping a Bloom filter representing the set of WORM/URL keys for which `alt_keys` is set. + +In the `addurl --fast` case for special remotes, where the remote can determine a file's checksum without downloading, a checksum-based key would be recorded to begin with, as happens with `addurl` without `--fast`. Currently I do this by manually calling plumbing commands like `git-annex-setpresentkey`, but having `addurl` do it seems better. +"""]] diff --git a/doc/todo/alternate_keys_for_same_content/comment_6_ae8355ec917e7a7a240cdb88714c55d0._comment b/doc/todo/alternate_keys_for_same_content/comment_6_ae8355ec917e7a7a240cdb88714c55d0._comment new file mode 100644 index 0000000000..92d5ba48f7 --- /dev/null +++ b/doc/todo/alternate_keys_for_same_content/comment_6_ae8355ec917e7a7a240cdb88714c55d0._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="Chel" + avatar="http://cdn.libravatar.org/avatar/a42feb5169f70b3edf7f7611f7e3640c" + subject="comment 6" + date="2020-02-01T02:32:01Z" + content=""" +There is also `aaa/bbb/*.log.cid` in git-annex branch for \"per-remote content identifiers for keys\". +It could be another place to store alternate keys, but it is per-remote, so... no. + +As for the metadata field `alt_keys` — it is another case of +\"[setting a metadata field to a key](/todo/Bidirectional_metadata/#comment-788380998b25267c5b99c4a865277102)\" +in [[Bidirectional metadata]]. + +Also, there is an interesting idea of [[git-annex-migrate using git-replace]]. + +By the way, as far as I know (maybe things have changed since then), +ipfs has a similar problem of different identifiers for the same content. +Because it encodes how things are stored. And hash functions can also be changed. +"""]] diff --git a/doc/todo/alternate_keys_for_same_content/comment_7_1c0a975893c63c14b3f6e17712b5191c._comment b/doc/todo/alternate_keys_for_same_content/comment_7_1c0a975893c63c14b3f6e17712b5191c._comment new file mode 100644 index 0000000000..80f61be55d --- /dev/null +++ b/doc/todo/alternate_keys_for_same_content/comment_7_1c0a975893c63c14b3f6e17712b5191c._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="potential security issues?" + date="2020-02-06T21:00:55Z" + content=""" +I wonder if storing checksums in a general-purpose mutable metadata field may cause security issues. Someone could use the [[`git-annex-metadata`|git-annex-metadata]] command to overwrite the checksum. It should be stored in a read-only field written only by `git-annex` itself, like the `field-lastchanged` metadata already is. + +Of course, if someone is able to write the [[git-annex branch|internals#The_git-annex_branch]] directly, or get the user to pull merges to it, they could alter the checksum stored there. Maybe, only trust stored checksums if `merge.verifySignatures=true`? +"""]] diff --git a/doc/todo/annex.addunlocked_in_gitattributes.mdwn b/doc/todo/annex.addunlocked_in_gitattributes.mdwn index 45e064e03e..22ff605a53 100644 --- a/doc/todo/annex.addunlocked_in_gitattributes.mdwn +++ b/doc/todo/annex.addunlocked_in_gitattributes.mdwn @@ -9,3 +9,5 @@ would effectively build up a file match expression. So it might then follow that the git config should also be a file match expression, with "true" being the same as "anything" and "false" the same as "nothing" for back-compat. --[[Joey]] + +> This got accomplished by other means, [[done]] --[[Joey]] diff --git a/doc/todo/annex.thin_without_hardlinks.mdwn b/doc/todo/annex.thin_without_hardlinks.mdwn index dd3047c78a..f33b8db949 100644 --- a/doc/todo/annex.thin_without_hardlinks.mdwn +++ b/doc/todo/annex.thin_without_hardlinks.mdwn @@ -13,3 +13,5 @@ need a git hook run before checkout to rescue such files. Also some parts of git-annex's code, including `withObjectLoc`, assume that the .annex/objects is present, and so it would need to be changed to look at the work tree file. --[[Joey]] + +[[!tag needsthought]] diff --git a/doc/todo/annex_merge_--remotes.mdwn b/doc/todo/annex_merge_--remotes.mdwn index 2c554aed00..46ad888b18 100644 --- a/doc/todo/annex_merge_--remotes.mdwn +++ b/doc/todo/annex_merge_--remotes.mdwn @@ -2,3 +2,4 @@ ATM 'annex merge' does not accept any parameter to specify which remotes to cons [[!meta author=yoh]] [[!tag projects/datalad]] +[[!tag moreinfo]] diff --git a/doc/todo/annex_merge_--remotes/comment_4_8cb7787fbfae63c45a155ee6ef270922._comment b/doc/todo/annex_merge_--remotes/comment_4_8cb7787fbfae63c45a155ee6ef270922._comment new file mode 100644 index 0000000000..be908e6592 --- /dev/null +++ b/doc/todo/annex_merge_--remotes/comment_4_8cb7787fbfae63c45a155ee6ef270922._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2020-01-29T15:10:09Z" + content=""" +Based on my last comment, I think, if you still need this, you +should try configuring remote.name.fetch to avoid fetching the git-annex +branches you don't want to merge. + +If that's not sufficient, followup and we can think about the other options +I discussed earlier. +"""]] diff --git a/doc/todo/arm64_autobuilder.mdwn b/doc/todo/arm64_autobuilder.mdwn index 22826d61be..4d2566575f 100644 --- a/doc/todo/arm64_autobuilder.mdwn +++ b/doc/todo/arm64_autobuilder.mdwn @@ -11,3 +11,6 @@ autobuilder? --[[Joey]] Currently running release builds for arm64 on my phone, but it's not practical to run an autobuilder there. --[[Joey]] + +>> [[done]]; the current qemu based autobuilder is not ideal, often gets +>> stuck, but there's no point leaving this todo open. --[[Joey]] diff --git a/doc/todo/assistant_should_detect_added_remotes.mdwn b/doc/todo/assistant_should_detect_added_remotes.mdwn index 288609ae11..4e690c8805 100644 --- a/doc/todo/assistant_should_detect_added_remotes.mdwn +++ b/doc/todo/assistant_should_detect_added_remotes.mdwn @@ -3,3 +3,5 @@ I think it would be useful if the assistant (when monitoring a repo) could autom If I then add each repo as a remote of the other (from the command-line), assistant will still not sync files between the repos until I stop all the assistants running and then restart them. Presumably only on launch does the assistant check the list of remotes? I think this is perhaps causing issues for users not just on the command-line but also for users who create multiple local remotes from the webapp and then combine them, since the webapp is perhaps not restarting the assistant daemons after the combine operation? I'm not sure about this… + +[[!tag confirmed]] diff --git a/doc/todo/assistant_support_hide-missing.mdwn b/doc/todo/assistant_support_hide-missing.mdwn index 12ea72d5d5..9df2bcec8d 100644 --- a/doc/todo/assistant_support_hide-missing.mdwn +++ b/doc/todo/assistant_support_hide-missing.mdwn @@ -9,3 +9,5 @@ This would invole: * The assistant ought to update the adjusted branch at some point after downloads, but it's not clear when. Perhaps this will need to be deferred until it can be done more cheaply, so it can do it after every file. + +[[!tag confirmed]] diff --git a/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters.mdwn b/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters.mdwn index dd95be5d69..493c8e42db 100644 --- a/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters.mdwn +++ b/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters.mdwn @@ -14,3 +14,5 @@ At least for the built in special remotes (not external) this should be possible [[!meta author=yoh]] [[!tag projects/dandi]] + +> [[done]] --[[Joey]] diff --git a/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_1_63d48db769863cdfe411404e8c26a399._comment b/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_1_63d48db769863cdfe411404e8c26a399._comment new file mode 100644 index 0000000000..a3e2d62db4 --- /dev/null +++ b/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_1_63d48db769863cdfe411404e8c26a399._comment @@ -0,0 +1,30 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-06T16:12:01Z" + content=""" +There's a subtle backwards compatibility issue here: The stored config of a +special remote is used when enabling it, so if an older version of +git-annex is used to enable a remote, there might be a setting that it does +not know about, or a value it doesn't understand. If that caused it to fail +to enable the remote it wouldn't be possible to use it, at least w/o +changing/removing the config. + +For example, autoenable=true did not used to be a config setting, but older +versions of git-annex can still use remotes that have that. + +Another example is chunk=. While older versions of git-annex don't +understand that, and so won't use chunks when storing/retrieving, +the newer git-annex falls back to getting the unchunked object. +So things stored by the old git-annex can be retrieved by the new, +but not vice-versa. + +Another example is S3's storageclass=. Older git-annex doesn't understand +it, so uses the default storage class, but that behavior is interoperable +with the new behavior. + +So the stored config of a remote should not be checked +everytime the remote is instantiated, but only the new settings passed +to initremote/enableremote. That will complicate the API, since currently +the old and new config are combined together by enableremote. +"""]] diff --git a/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_2_afa8c10bd3b1df649c1f643430b300e9._comment b/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_2_afa8c10bd3b1df649c1f643430b300e9._comment new file mode 100644 index 0000000000..d5c16d746f --- /dev/null +++ b/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_2_afa8c10bd3b1df649c1f643430b300e9._comment @@ -0,0 +1,37 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-01-07T17:59:35Z" + content=""" +I was thinking about implementing this today, but the shattered attack got +in the way. Anyway, it seems like most of a plan: + +* Make RemoteConfig contain Accepted or Proposed values. enableremote and initremote + set Proposed values; Accepted values are anything read from git-annex:remote.log + (update: done) +* When a RemoteConfig value fails to parse, it may make sense to use a + default instead when it's Accepted, and error out when it's Proposed. This could + be used when parsing foo=yes/no to avoid treating foo=true the same as + foo=no, which some things do currently do + (eg importtree, exporttree, embedcreds). + (update: Done for most yes/no and true/false parsers, surely missed a + few though, (including autoenable).) +* Add a Remote method that returns a list of all RemoteConfig fields it + uses. This is the one part I'm not sure about, because that violates DRY. + It would be nicer to have a parser that can also generate a list of the + fields it parses. +* Before calling Remote setup, see if there is any Proposed value in + RemoteConfig whose field is not in the list. If so, error out. +* For external special remotes, add a LISTCONFIG message. The program + reponds with a list of all the fields it may want to later GETCONFIG. + If the program responds with UNSUPPORTED-REQUEST then it needs to return + something that says any and all fields are allowed. +* External special remotes are responsible for parsing the content of + GETCONFIG, as they do now, and can error out if there's a problem. + +Having a method return a list of fields will also allow +implementing +. +It may be worthwhile to add, along with the field name, a human readable +description of its value. +"""]] diff --git a/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_3_f19ff768a3903f80dbaa378b74d2a7e3._comment b/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_3_f19ff768a3903f80dbaa378b74d2a7e3._comment new file mode 100644 index 0000000000..45fa03522e --- /dev/null +++ b/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_3_f19ff768a3903f80dbaa378b74d2a7e3._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-01-15T17:52:07Z" + content=""" +Unknown fields will now result in an error message. And values like yes/no +and true/false get parsed upfront. + +External special remotes currently still accept all fields, so work still +needs to be done to extend the protocol to list acceptable fields. +"""]] diff --git a/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_4_b031ee12622b1ed28b6ffc8c037f7d30._comment b/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_4_b031ee12622b1ed28b6ffc8c037f7d30._comment new file mode 100644 index 0000000000..526b389028 --- /dev/null +++ b/doc/todo/assure_correct_names___40__and_values__41___for_special_remotes_parameters/comment_4_b031ee12622b1ed28b6ffc8c037f7d30._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2020-01-17T21:15:16Z" + content=""" +Added LISTCONFIGS to external special remote protocol, and once your +special remotes implement it, initremote will notice if the user provides +any setting with the wrong name. + +(external special remotes could already verify the values of settings using +GETCONFIG at the INITREMOTE stage, and use INITREMOTE-FAILURE to inform the +user of bad or missing values) +"""]] diff --git a/doc/todo/auto-lock_files_after_one_edit.mdwn b/doc/todo/auto-lock_files_after_one_edit.mdwn index 1d6acb2311..e9164c46e2 100644 --- a/doc/todo/auto-lock_files_after_one_edit.mdwn +++ b/doc/todo/auto-lock_files_after_one_edit.mdwn @@ -1,3 +1,5 @@ Can an option be added to unlock a file in such a way that the next time it gets committed, it is automatically re-locked? Or to just have this done for all unlocked files? It's a common use case to just do one edit / re-generation of a locked file. If you forget to lock it (or a script that was supposed to lock it after modification fails in the middle), you end up with a permanently unlocked file, which can cause [[performance issues|bugs/git_status_extremely_slow_with_v7]] downstream, and also [[look odd when missing|todo/symlinks_for_not-present_unlocked_files]], lead to multiple copies when present (or risk [[annex.thin issues|bugs/annex.thin_can_cause_corrupt___40__not_just_missing__41___data]]), and leave the file open to inadvertent/unintended modification. Also, locking the file manually litters the git log with commits that don't really change repo contents. + +[[!tag needsthought]] diff --git a/doc/todo/batch_operations_for_remotes.mdwn b/doc/todo/batch_operations_for_remotes.mdwn index 226417cab7..a7638a0040 100644 --- a/doc/todo/batch_operations_for_remotes.mdwn +++ b/doc/todo/batch_operations_for_remotes.mdwn @@ -1 +1,3 @@ Current special remote protocol works on one file at a time. With some remotes, a batch operation can be more efficient, e.g. querying the status of many URLs in one API call. It would be good if special remotes could optionally implement batch versions of their operations, and these versions were used by batch-mode git-annex commands. Or maybe, keep the current set of commands but let the remote read multiple requests and then send multiple replies? + +[[!tag moreinfo]] diff --git a/doc/todo/better_error_messages_for_local_remote.mdwn b/doc/todo/better_error_messages_for_local_remote.mdwn index 189008596a..a66a0138ba 100644 --- a/doc/todo/better_error_messages_for_local_remote.mdwn +++ b/doc/todo/better_error_messages_for_local_remote.mdwn @@ -9,3 +9,5 @@ object in it. This should be fixable by eg, catching all exceptions when running Annex operations on a remote, adding its path to the message and rethrowing. --[[Joey]] + +[[!tag confirmed]] diff --git a/doc/todo/better_way_to_get_missing_files.mdwn b/doc/todo/better_way_to_get_missing_files.mdwn index 4e7ddb4a07..1eec6b4be5 100644 --- a/doc/todo/better_way_to_get_missing_files.mdwn +++ b/doc/todo/better_way_to_get_missing_files.mdwn @@ -32,3 +32,5 @@ Two open questions: objects over time. So leave the update up to the user to run the command when they want it? But then the user may get confused, why did it download files and they didn't appear? + +[[!tag needsthought]] diff --git a/doc/todo/borg_special_remote.mdwn b/doc/todo/borg_special_remote.mdwn index 0d4e417289..c6b66ab94b 100644 --- a/doc/todo/borg_special_remote.mdwn +++ b/doc/todo/borg_special_remote.mdwn @@ -13,3 +13,5 @@ backups, and git-annex would then be aware of what was backed up in borg, and could do things like count that as a copy. --[[Joey]] + +[[!tag needsthought]] diff --git a/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default.mdwn b/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default.mdwn index 4fc5706e21..e4482c5a1f 100644 --- a/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default.mdwn +++ b/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default.mdwn @@ -3,3 +3,5 @@ Changing the default would also let one [[repeatedly re-import a directory while keeping original files in place|bugs/impossible__40____63____41___to_continuously_re-import_a_directory_while_keeping_original_files_in_place]]. I realize this would be a breaking change for some workflows; warning of it [[like git does|todo/warn_of_breaking_changes_same_way_git_does]] would mitigate the breakage. + +[[!tag unlikely]] diff --git a/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default/comment_3_6a7c2cbd85b5ac8e90933aaa9147e004._comment b/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default/comment_3_6a7c2cbd85b5ac8e90933aaa9147e004._comment new file mode 100644 index 0000000000..4d1829de44 --- /dev/null +++ b/doc/todo/change_git-annex-import_not_to_delete_original_files_by_default/comment_3_6a7c2cbd85b5ac8e90933aaa9147e004._comment @@ -0,0 +1,7 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-01-30T17:09:00Z" + content=""" +See [[todo/remove_legacy_import_directory_interface]]. +"""]] diff --git a/doc/todo/config_setting_to_force_--progress_reporting_for_some_git_calls.mdwn b/doc/todo/config_setting_to_force_--progress_reporting_for_some_git_calls.mdwn index c7f5507afe..fea3e171a8 100644 --- a/doc/todo/config_setting_to_force_--progress_reporting_for_some_git_calls.mdwn +++ b/doc/todo/config_setting_to_force_--progress_reporting_for_some_git_calls.mdwn @@ -6,3 +6,5 @@ Thanks in advance for considering [[!meta author=yoh]] [[!tag projects/datalad]] + +> [[done]] diff --git a/doc/todo/config_setting_to_force_--progress_reporting_for_some_git_calls/comment_4_ceb543ebac42bf7a5f1571ff3f4ef977._comment b/doc/todo/config_setting_to_force_--progress_reporting_for_some_git_calls/comment_4_ceb543ebac42bf7a5f1571ff3f4ef977._comment new file mode 100644 index 0000000000..55b02b182c --- /dev/null +++ b/doc/todo/config_setting_to_force_--progress_reporting_for_some_git_calls/comment_4_ceb543ebac42bf7a5f1571ff3f4ef977._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2020-01-30T16:23:55Z" + content=""" +Occurs to me that any git-annex command could result in an automatic +init, and since v7 is default, will enter an adjusted branch when on a +crippled filesystem. + +I don't think it makes sense to add --progress to every single +git-annex command. + +I suppose, if your code always runs git-annex init after clone, then it +would be good enough to have git-annex init be the only thing that +supports --progress. If something else needs it (maybe the view commands), +we can treat that separately. +"""]] diff --git a/doc/todo/config_setting_to_force_--progress_reporting_for_some_git_calls/comment_5_ec99ba117fac299fc361a32de4ff29a5._comment b/doc/todo/config_setting_to_force_--progress_reporting_for_some_git_calls/comment_5_ec99ba117fac299fc361a32de4ff29a5._comment new file mode 100644 index 0000000000..42452649f0 --- /dev/null +++ b/doc/todo/config_setting_to_force_--progress_reporting_for_some_git_calls/comment_5_ec99ba117fac299fc361a32de4ff29a5._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2020-01-30T16:38:30Z" + content=""" +Heh, looking at the code, [[!commit +24838547e2475b37d7e910361f9b6e087a1a0648]] in 2018 +made --progress be unconditionally passed when entering an adjusted branch. + +That was done for unrelated reasons, but I don't think there's anything more +to do on this now. +"""]] diff --git a/doc/todo/confirmed.mdwn b/doc/todo/confirmed.mdwn new file mode 100644 index 0000000000..8df3805a1a --- /dev/null +++ b/doc/todo/confirmed.mdwn @@ -0,0 +1,8 @@ +This tag is for todo items that have an agreed upon plan of action, but +have not been implemented yet. + +[[!inline pages="todo/* and !todo/*/* and !todo/done and !link(todo/done) +and link(todo/confirmed) +and !*/Discussion and !todo/moreinfo and !todo/confirmed +and !todo/needsthought and !todo/unlikely" show=0 feedlimit=10 +archive=yes template=buglist]] diff --git a/doc/todo/consider_meow_backend.mdwn b/doc/todo/consider_meow_backend.mdwn index cb2aea4e3e..f826b387a1 100644 --- a/doc/todo/consider_meow_backend.mdwn +++ b/doc/todo/consider_meow_backend.mdwn @@ -33,3 +33,6 @@ be useful to speed up checks on larger files. The license is a I know it might sound like a conflict of interest, but I *swear* I am not bringing this up only as a oblique feline reference. ;) -- [[anarcat]] + +> Let's concentrate on [[xxhash|todo/add_xxHash_backend]] or other new hashes that are getting general +> adoption, not niche hashes like meow. [[done]] --[[Joey]] diff --git a/doc/todo/consider_meow_backend/comment_1_33266c088bbf1c7fa93917ff86a3cfcd._comment b/doc/todo/consider_meow_backend/comment_1_33266c088bbf1c7fa93917ff86a3cfcd._comment new file mode 100644 index 0000000000..c12a24cd92 --- /dev/null +++ b/doc/todo/consider_meow_backend/comment_1_33266c088bbf1c7fa93917ff86a3cfcd._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-06T19:36:32Z" + content=""" +xxhash seems to fill a similar niche and is getting a lot more use from +what I can see. + +Meow seems to claim a faster gb/s rate than xxhash does, but +it's hard to tell if the benchmarks are really equivilant. +"""]] diff --git a/doc/todo/copy_instead_of_rename_on_reinject.mdwn b/doc/todo/copy_instead_of_rename_on_reinject.mdwn index 1aa3274b52..7b9c72510d 100644 --- a/doc/todo/copy_instead_of_rename_on_reinject.mdwn +++ b/doc/todo/copy_instead_of_rename_on_reinject.mdwn @@ -32,3 +32,5 @@ surprise users... I suggest using a logic similar to [[git-annex-import]] for consistency reasons. Thanks! -- [[anarcat]] + +[[!tag unlikely]] diff --git a/doc/todo/dockerized_external_special_remotes.mdwn b/doc/todo/dockerized_external_special_remotes.mdwn index 264430b48c..af06524e6a 100644 --- a/doc/todo/dockerized_external_special_remotes.mdwn +++ b/doc/todo/dockerized_external_special_remotes.mdwn @@ -1 +1,3 @@ If an external special remote is implemented as a Docker container, it can be safely autoenabled and run in a sandboxed way. So the distributor of a repo that has annex files fetchable with a given special remote, could have the docker tag for the special remote configured on the git-annex branch, and users could then clone and use the repo without needing to install anything. + +[[!tag needsthought]] diff --git a/doc/todo/document_git-annex_dependencies.mdwn b/doc/todo/document_git-annex_dependencies.mdwn index 861421c5ac..90b98c7898 100644 --- a/doc/todo/document_git-annex_dependencies.mdwn +++ b/doc/todo/document_git-annex_dependencies.mdwn @@ -1 +1,3 @@ It would help to document, in one place, the external programs and libraries on which git-annex depends for various functionalities, including optional ones. Ones I know: curl, gpg, bup. But there are also references in places to lsof, rsync, nocache. For reliable packaging, it would be useful to have an authoritative list of dependencies and which functionality each supports. + +[[!tag unlikely]] diff --git a/doc/todo/documenting_sqlite_database_schemas.mdwn b/doc/todo/documenting_sqlite_database_schemas.mdwn index 5a67c19212..9702501f5d 100644 --- a/doc/todo/documenting_sqlite_database_schemas.mdwn +++ b/doc/todo/documenting_sqlite_database_schemas.mdwn @@ -1,3 +1,5 @@ If a spec of the [[sqlite database schemas|todo/sqlite_database_improvements]] could be added to the [[internals]] docs, this would open some possibilities for third-party tools based on this info. E.g. one could write some sqlite3 queries to get aggregate info on the number (and total size?) of keys present in specific combinations of repos. It would of course be understood that this is internal info subject to frequent change. Also, if [[Sometimes the databases are used for data that has not yet been committed to git|devblog/day_607__v8_is_done]], this would improve [[future_proofing]]. + +[[!tag needsthought unlikely]] diff --git a/doc/todo/documenting_sqlite_database_schemas/comment_1_cc94cb2467c2b261c0b79c64853103a5._comment b/doc/todo/documenting_sqlite_database_schemas/comment_1_cc94cb2467c2b261c0b79c64853103a5._comment new file mode 100644 index 0000000000..4003cda633 --- /dev/null +++ b/doc/todo/documenting_sqlite_database_schemas/comment_1_cc94cb2467c2b261c0b79c64853103a5._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-06T18:07:00Z" + content=""" +There are not any situations where after losing the sqlite databases +git-annex can't recover the information that was stored in them by other +means. I know because the v8 upgrade deletes all the old sqlite databases +and then recovers the information by other means. So no future-proofing +impact here. +"""]] diff --git a/doc/todo/documenting_sqlite_database_schemas/comment_2_06bc5d625d9a3e7522c7f6df53a7ac3d._comment b/doc/todo/documenting_sqlite_database_schemas/comment_2_06bc5d625d9a3e7522c7f6df53a7ac3d._comment new file mode 100644 index 0000000000..77b033984a --- /dev/null +++ b/doc/todo/documenting_sqlite_database_schemas/comment_2_06bc5d625d9a3e7522c7f6df53a7ac3d._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-01-06T18:16:13Z" + content=""" +It's easy enough to dump the database and see its schema. + + joey@darkstar:~/lib/big>sqlite3 .git/annex/keys/db + sqlite> .dump + CREATE TABLE IF NOT EXISTS "associated"("id" INTEGER PRIMARY KEY,"key" VARCHAR NOT NULL,"file" VARCHAR NOT NULL,CONSTRAINT "key_file_index" UNIQUE ("key","file"),CONSTRAINT "file_key_index" UNIQUE ("file","key")); + CREATE TABLE IF NOT EXISTS "content"("id" INTEGER PRIMARY KEY,"key" VARCHAR NOT NULL,"cache" VARCHAR NOT NULL,CONSTRAINT "key_cache_index" UNIQUE ("key","cache")); + +Or the fully typed schema can be looked up in the haskell code +(Database/Keys/Sql.hs) + +I think that how the information in the databases relates to the state of the +repository, and how it's updated from the git-annex branch etc is just as +important as the schema. For example, if you wanted to use this database to +query files using a key, you'd need to know this database only gets +populated for unlocked files not locked files. And that the database may not +reflect recent changes to the working tree, and there's a complicated process +that can be used to update it to reflect any recent changes. + +That's rather deep into the implementation details to be documenting +outside the code. +"""]] diff --git a/doc/todo/does_not_preserve_timestamps/comment_11_eea115becce4ac79932d8cfa2903f20c._comment b/doc/todo/does_not_preserve_timestamps/comment_11_eea115becce4ac79932d8cfa2903f20c._comment new file mode 100644 index 0000000000..6ef3a96290 --- /dev/null +++ b/doc/todo/does_not_preserve_timestamps/comment_11_eea115becce4ac79932d8cfa2903f20c._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="https://christian.amsuess.com/chrysn" + nickname="chrysn" + avatar="http://christian.amsuess.com/avatar/c6c0d57d63ac88f3541522c4b21198c3c7169a665a2f2d733b4f78670322ffdc" + subject="Summary; Application: shared thumbnails" + date="2020-01-10T08:41:18Z" + content=""" +There are two conflicting approaches to mtimes: + +* Treat them as local artifacts + + This works great with Make, and generally with any software that works on \"is newer than\" properties. + +* Treat them as preservation-worthy file attributes + + This is generally required by tools that compare time stamps by identity. + +Both approaches break tools that expect the other, and no single out-of-the-box choice will make all users happy. Tools like metastore, a bespoke solution like etckeeper's generated mkdir/chmod file or a git-annex solution like [[storing the full mtime at genmetadata time|bugs/file_modification_time_should_be_stored_in_exactly_one_metadata_field/]] with a (local or repository-wide) option to set the mtime at annex-get time would be convenient. + +One more application where this would be relevant is sharing generated thumbnails among clones of repositories (to eventually maybe even have them available when the full files are not present) following the [XDG specification on shared thumnail repositories](https://specifications.freedesktop.org/thumbnail-spec/thumbnail-spec-latest.html#SHARED). Not only does that design rely on the mtimes of the thumbnail and the file to match, it even encodes the mtime again inside the thumbnail, practically requiring all checkouts to not only have consistent mtimes between thumbnails and files, but identical ones. +"""]] diff --git a/doc/todo/encrypt_only_the_credentials.mdwn b/doc/todo/encrypt_only_the_credentials.mdwn index 0458e54f42..d510f45002 100644 --- a/doc/todo/encrypt_only_the_credentials.mdwn +++ b/doc/todo/encrypt_only_the_credentials.mdwn @@ -1 +1,3 @@ Is it possible to add an option, for initremote/enableremote, to encrypt the credentials but not the contents? Then it would be possible to have an exporttree remote while using embedcreds. It would also be good if locally stored credentials could be stored in encrypted form, and decrypted for use as needed. I'm uneasy about keeping credentials accessible without a passphrase. + +[[!tag confirmed]] diff --git a/doc/todo/encrypting_URLs.mdwn b/doc/todo/encrypting_URLs.mdwn index 1b8fe9e103..ec295c3ecf 100644 --- a/doc/todo/encrypting_URLs.mdwn +++ b/doc/todo/encrypting_URLs.mdwn @@ -7,3 +7,4 @@ store files under paths like s3://mybucket/randomstring/myfile ; the URL is "pub If the URLs could be stored encrypted in the git-annex branch, one could track such files using the ordinary web remote. One could use an S3 export-tree remote to share a directory with specific recipient(s), without them needing either AWS credentials or git-annex. +[[!tag unlikely moreinfo]] diff --git a/doc/todo/encrypting_URLs/comment_1_9d0dda82d1f3907e6478a95608388836._comment b/doc/todo/encrypting_URLs/comment_1_9d0dda82d1f3907e6478a95608388836._comment new file mode 100644 index 0000000000..d86408df35 --- /dev/null +++ b/doc/todo/encrypting_URLs/comment_1_9d0dda82d1f3907e6478a95608388836._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-30T18:25:58Z" + content=""" +Is this about SETURLPRESENT in an external special remote, or is addurl +also supposed to enctypt an url? And how would addurl know if the user +wants to encrypt it, and using what gpg keys? + +If your git-annex repo contains information about files you want to remain +private, why not just keep that repo private? +"""]] diff --git a/doc/todo/export_paired_rename_innefficenctcy.mdwn b/doc/todo/export_paired_rename_innefficenctcy.mdwn index 9284b74d9a..029a062c90 100644 --- a/doc/todo/export_paired_rename_innefficenctcy.mdwn +++ b/doc/todo/export_paired_rename_innefficenctcy.mdwn @@ -8,3 +8,5 @@ Perhaps: Find pairs of renames that swap content between two files. Run each pair in turn. Then run the current rename code. Although this still probably misses cases, where eg, content cycles amoung 3 files, and the same content amoung 3 other files. Is there a general algorythm? + +[[!tag needsthought]] diff --git a/doc/todo/external_backends.mdwn b/doc/todo/external_backends.mdwn index bf6470b5bd..e937dd6a59 100644 --- a/doc/todo/external_backends.mdwn +++ b/doc/todo/external_backends.mdwn @@ -3,3 +3,5 @@ It would be good if one could define custom external [[backends]], the way one c @joey pointed out a potential problem: "needing to deal with the backend being missing or failing to work could have wide repurcussions in the code base." I wonder if there are ways around that. Suppose you specified a default backend to use in case a custom one was unavailable? Then you could always compute a key from a file, even if it's not in the right backend. And once a key is stored in git-annex, most of git-annex treats the key as just a string. If the custom backend supports checksum verification, without the backend's implementation, keys from that backend would be treated like WORM/URL keys that do not support checksum checking. Thoughts? + +[[!tag needsthought]] diff --git a/doc/todo/external_remote_querying_transition.mdwn b/doc/todo/external_remote_querying_transition.mdwn index e0be664bff..f0399108a8 100644 --- a/doc/todo/external_remote_querying_transition.mdwn +++ b/doc/todo/external_remote_querying_transition.mdwn @@ -48,3 +48,5 @@ subsequent WHEREIS, which may complicate its code slightly. Note that the protocol does allow querying with GETCONFIG etc before responding to a WHEREIS request. + +[[!tag confirmed]] diff --git a/doc/todo/git-annex-cat.mdwn b/doc/todo/git-annex-cat.mdwn index ed49cca89c..e21fd83062 100644 --- a/doc/todo/git-annex-cat.mdwn +++ b/doc/todo/git-annex-cat.mdwn @@ -3,3 +3,5 @@ It would be useful to have a [[`git-annex-cat`|forum/Is_there_a___34__git_annex_ If file is not present, or `remote.here.cost` is higher than `remote.someremote.cost` where file is present, `someremote` would get a `TRANSFER` request where the `FILE` argument is a named pipe, and a `cat` of that named pipe would be started. If file is not annexed, for uniformity `git-annex-cat file` would just call `cat file`. + +[[!tag needsthought]] diff --git a/doc/todo/git-annex-cat/comment_4_8708bf0aef0025e2ed1a8b9d2a7f0112._comment b/doc/todo/git-annex-cat/comment_4_8708bf0aef0025e2ed1a8b9d2a7f0112._comment new file mode 100644 index 0000000000..7c602eb537 --- /dev/null +++ b/doc/todo/git-annex-cat/comment_4_8708bf0aef0025e2ed1a8b9d2a7f0112._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2020-01-01T18:44:37Z" + content=""" +@Ilya_Shlyakhter, I'd assume: + +* some remotes would write to the named pipe +* some remotes would overwrite it with a file +* some remotes would open it, try to seek around as they do non-sequential + recieves, and hang or something +* some remotes would maybe open and write to it, but would no longer be + able to resume interrupted transfers, since they would I guess see its + size as 0 +"""]] diff --git a/doc/todo/git-annex-export_--from_option.mdwn b/doc/todo/git-annex-export_--from_option.mdwn index c9d07ee9d9..9bebfa5eee 100644 --- a/doc/todo/git-annex-export_--from_option.mdwn +++ b/doc/todo/git-annex-export_--from_option.mdwn @@ -5,3 +5,5 @@ Now I followed the documentation about the special remote adb and created that r Which is caused by the fact that I didn't have checked out the files on my workstation. I don't need the files on this pc so it would be stupid to checkout partially huge files there or in other words I don't need the files at that place, I don't get why the export command not has a --from option where it can get the files? Is there a reason that does not exist and if so what would be a way to do sending files to the android device without ssh-ing into my server? + +[[!tag unlikely]] diff --git a/doc/todo/git-annex-get_--batch_--key.mdwn b/doc/todo/git-annex-get_--batch_--key.mdwn index 4d210fa387..a54d539102 100644 --- a/doc/todo/git-annex-get_--batch_--key.mdwn +++ b/doc/todo/git-annex-get_--batch_--key.mdwn @@ -1 +1,3 @@ Can git-annex-get be extended so that "git-annex-get --batch --key" fetches the keys (rather than filenames) given in the input? + +[[!tag needsthought]] diff --git a/doc/todo/git-annex-init_should_configure_git_diff_driver.mdwn b/doc/todo/git-annex-init_should_configure_git_diff_driver.mdwn index 2b36d89832..846ae0c4ee 100644 --- a/doc/todo/git-annex-init_should_configure_git_diff_driver.mdwn +++ b/doc/todo/git-annex-init_should_configure_git_diff_driver.mdwn @@ -1 +1,3 @@ `git diff` for annexed files, especially unlocked annexed files, is currently uninformative. It would help if [[`git-annex-init`|git-annex-init]] configured a [git diff driver](https://git-scm.com/docs/gitattributes#_generating_diff_text) to diff the contents of the annexed files, rather than the pointer files. + +> [[wontfix|done]], see comment diff --git a/doc/todo/git-annex-init_should_configure_git_diff_driver/comment_1_e1161efe2fa7c87c6d62d4a2624737f4._comment b/doc/todo/git-annex-init_should_configure_git_diff_driver/comment_1_e1161efe2fa7c87c6d62d4a2624737f4._comment new file mode 100644 index 0000000000..70c2d59ea0 --- /dev/null +++ b/doc/todo/git-annex-init_should_configure_git_diff_driver/comment_1_e1161efe2fa7c87c6d62d4a2624737f4._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-06T18:39:27Z" + content=""" +Normally annexed files are huge binary files. Line-by-line diff of such +files is unlikely to be useful. + +So you would need some domain-specific diff for the kind of binary files +you are storing in git-annex. If you have one, you can use +[[git-annex-diffdriver]] to make git use it when diffing annexed files. + +Not seeing anything more I can do here, so I'm going to close this todo. +"""]] diff --git a/doc/todo/git-annex-migrate_using_git-replace.mdwn b/doc/todo/git-annex-migrate_using_git-replace.mdwn index 98dcf9bc61..8d3a857ac3 100644 --- a/doc/todo/git-annex-migrate_using_git-replace.mdwn +++ b/doc/todo/git-annex-migrate_using_git-replace.mdwn @@ -1,3 +1,5 @@ Currently, git-annex-migrate leads to content (and metadata) being stored under both old and new keys. git-annex-unused can drop the contents under the old key, but then you can't access the content if you check out an older commit. Maybe, an option can be added to migrate keys using [git-replace](https://git-scm.com/docs/git-replace) ? You'd git-replace the blob .git/annex/objects/old_key with the blob .git/annex/objects/new_key, the blob ../.git/annex/objects/old_key with the blob ../.git/annex/objects/new_key , etc. You could then also have a setting to auto-migrate non-checksum keys to checksum keys whenever the contents gets downloaded. More generally, git-annex-replace could be implemented this way, doing what git-replace does, but for git-annex keys rather than git hashes. [[git-annex-pre-commit]] might need to be changed to implement replacement of keys added later. + +[[!tag needsthought]] diff --git a/doc/todo/git-annex-migrate_using_git-replace/comment_1_6a317be851dfb72c4aaaf5786dd1a1ff._comment b/doc/todo/git-annex-migrate_using_git-replace/comment_1_6a317be851dfb72c4aaaf5786dd1a1ff._comment new file mode 100644 index 0000000000..2240298a3a --- /dev/null +++ b/doc/todo/git-annex-migrate_using_git-replace/comment_1_6a317be851dfb72c4aaaf5786dd1a1ff._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="Chel" + avatar="http://cdn.libravatar.org/avatar/a42feb5169f70b3edf7f7611f7e3640c" + subject="comment 1" + date="2020-02-01T02:55:03Z" + content=""" +Very interesting idea! But some problems: + +- As mentioned, not only `.git/annex/<...>` blobs need to be replaces for every key, but also `/annex/<...>` +and all `../.git/annex/<...>`, `../../.git/annex/<...>`, etc. + +- In big repositories it can create a giant amount of *refs/replace/* refs. +I don't know how it affects the performance if they are stored in .git/packed-refs, +but it can interfere with the normal operation on a repo. +For example `git show-ref` will not work without ` | grep` or something. +"""]] diff --git a/doc/todo/git-annex-reinject_--known_should_not_fail_when_the_file_extension_is_different.mdwn b/doc/todo/git-annex-reinject_--known_should_not_fail_when_the_file_extension_is_different.mdwn index c84177fe59..e0fbed78d0 100644 --- a/doc/todo/git-annex-reinject_--known_should_not_fail_when_the_file_extension_is_different.mdwn +++ b/doc/todo/git-annex-reinject_--known_should_not_fail_when_the_file_extension_is_different.mdwn @@ -5,3 +5,5 @@ I think it would be better if `git annex reinject --known` would ignore the file This problem does not affect `git annex reinject` without `--known`. --spwhitton + +> mentioned this on the git-annex reinject man page; [[done]] --[[Joey]] diff --git a/doc/todo/git-annex-reinject_--known_should_not_fail_when_the_file_extension_is_different/comment_1_366978d6fda3bc00ccbfc2f9548a1492._comment b/doc/todo/git-annex-reinject_--known_should_not_fail_when_the_file_extension_is_different/comment_1_366978d6fda3bc00ccbfc2f9548a1492._comment new file mode 100644 index 0000000000..3e25f3cad4 --- /dev/null +++ b/doc/todo/git-annex-reinject_--known_should_not_fail_when_the_file_extension_is_different/comment_1_366978d6fda3bc00ccbfc2f9548a1492._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-06T17:11:58Z" + content=""" +I can't think of a reasonable way to implement this. + +It would need to hash and then look for a known SHA256E key that uses the +hash. But the layout of the git-annex branch doesn't provide any way to do +that, except for iterating over every filename in the branch. Which +would be prohibitively slow when reinjecting many files. (N times git +ls-tree -r) So it would need to build a data structure to map from SHA256 +to known SHA256E key. That can't be stored in memory, git-annex doesn't +let the content of the repo cause it to use arbitrary amounts of memory +(hopefully). + +All I can think of is to traverse the git-annex branch and build a sqlite +database and then query that, but that would add quite a lot of setup +overhead to the command. +"""]] diff --git a/doc/todo/git-annex-reinject_--known_should_not_fail_when_the_file_extension_is_different/comment_2_3df0601d0d999bdcbeb0f462e01f8863._comment b/doc/todo/git-annex-reinject_--known_should_not_fail_when_the_file_extension_is_different/comment_2_3df0601d0d999bdcbeb0f462e01f8863._comment new file mode 100644 index 0000000000..fb54f48029 --- /dev/null +++ b/doc/todo/git-annex-reinject_--known_should_not_fail_when_the_file_extension_is_different/comment_2_3df0601d0d999bdcbeb0f462e01f8863._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="spwhitton" + avatar="http://cdn.libravatar.org/avatar/9c3f08f80e67733fd506c353239569eb" + subject="comment 2" + date="2020-01-07T12:29:47Z" + content=""" +Thank you for your reply. Makes sense. If that's the only way to do it then it might as well be a helper script rather than part of git-annex. + +Leaving this bug open because it would be good to have the limitation documented in git-annex-reinject(1). +"""]] diff --git a/doc/todo/git-annex-reinject_does_not_work_in_a_bare_repo.mdwn b/doc/todo/git-annex-reinject_does_not_work_in_a_bare_repo.mdwn index dc46ac71ee..d4813403fc 100644 --- a/doc/todo/git-annex-reinject_does_not_work_in_a_bare_repo.mdwn +++ b/doc/todo/git-annex-reinject_does_not_work_in_a_bare_repo.mdwn @@ -26,3 +26,5 @@ Obviously this wasn't actually a file known to git-annex. But I get the same error in a non-dummy bare repo I am trying to reinject. A workaround is to use `git worktree add` and run `git annex reinject` from there. + +> [[fixed|done]] --[[Joey]] diff --git a/doc/todo/git-annex-sync_handling_of_linked_worktrees.mdwn b/doc/todo/git-annex-sync_handling_of_linked_worktrees.mdwn index 5442545085..2bd322cede 100644 --- a/doc/todo/git-annex-sync_handling_of_linked_worktrees.mdwn +++ b/doc/todo/git-annex-sync_handling_of_linked_worktrees.mdwn @@ -1 +1,3 @@ When using [[linked worktrees|tips/Using_git-worktree_with_annex]], the main tree is currently handled differently from the linked trees: "if there is change in the tree then syncing doesn't update git worktrees and their indices, but updates the checked out branches. This is different to the handling of the main working directory as it's either got updated or left behind with its branch if there is a conflict." Is there a reason for this? Could linked worktrees be treated same as main one? + +[[!tag moreinfo]] diff --git a/doc/todo/git-annex-sync_handling_of_linked_worktrees/comment_1_3e542583247177f9cdf44122ec483a0c._comment b/doc/todo/git-annex-sync_handling_of_linked_worktrees/comment_1_3e542583247177f9cdf44122ec483a0c._comment new file mode 100644 index 0000000000..eaf6675861 --- /dev/null +++ b/doc/todo/git-annex-sync_handling_of_linked_worktrees/comment_1_3e542583247177f9cdf44122ec483a0c._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-30T17:12:40Z" + content=""" +That tip was written by leni536, and I don't really understand what it's +talking about with a difference in sync behavior. I'm not sure it's +accurate or describes what happens clearly. + +To me it seems really simple, no matter if you have a regular work tree, or +are using git-worktree or whatever: sync fetches, merges, and pushes. Merging +updates the current work tree, and AFAIK not whatever other work trees might +be using the same .git repository. In any case, sync should behave the same +as git pull as far as updating work trees goes. + +Can you please show an example of whatever problem you may have with the +current behavior? +"""]] diff --git a/doc/todo/git-annex-test___58___skip_tests_if_external_utils_have_problems.mdwn b/doc/todo/git-annex-test___58___skip_tests_if_external_utils_have_problems.mdwn index 0e674ce65c..1e6a677d2d 100644 --- a/doc/todo/git-annex-test___58___skip_tests_if_external_utils_have_problems.mdwn +++ b/doc/todo/git-annex-test___58___skip_tests_if_external_utils_have_problems.mdwn @@ -1 +1,3 @@ git-annex-test failures sometimes reflect failures not of git-annex but of externals utils on which it relies. E.g. when my installation or configuration of gpg has problems, git-annex test suite fails due to the tests that rely on gpg. (And there doesn't seem to be a simple way to skip tests that match a regexp.) git-annex could avoid that by running some simple sanity checks (beyond just existence) on gpg or other optional dependencies, and skipping tests if these checks fail. E.g. if simple test commands to encrypt/sign a small file with gpg fail, then skip gpg-based tests (and warn the user). + +[[!tag unlikely]] diff --git a/doc/todo/git_annex_repair__58___performance_can_be_abysmal__44___huge_improvements_possible.mdwn b/doc/todo/git_annex_repair__58___performance_can_be_abysmal__44___huge_improvements_possible.mdwn index 407e099719..2dae2270f7 100644 --- a/doc/todo/git_annex_repair__58___performance_can_be_abysmal__44___huge_improvements_possible.mdwn +++ b/doc/todo/git_annex_repair__58___performance_can_be_abysmal__44___huge_improvements_possible.mdwn @@ -26,3 +26,5 @@ I would be willing to contribute some patches and although I have a respectable A a sidenote, I don't know how a repo containing about 300k files jumped to 1400k git objects within the last 2 months. Any feedback welcome, thanks. + +[[!tag needsthought unlikely]] diff --git a/doc/todo/git_repo_tracking___40__not_storage__41__.mdwn b/doc/todo/git_repo_tracking___40__not_storage__41__.mdwn index 6fad284489..5630f17d56 100644 --- a/doc/todo/git_repo_tracking___40__not_storage__41__.mdwn +++ b/doc/todo/git_repo_tracking___40__not_storage__41__.mdwn @@ -6,3 +6,5 @@ A few possibilities: - Create branches or tags in an annex that collect a set of version-compatible checkouts for related projects. The commit/tag messages provide a natural place for meta-commentary - Save and version files that aren't quite junk but don't belong *in* a repo (logs, dumps, backups, editor project/workspace files, notes/to-do lists, build-artifacts, test-coverage/linter stat databases, shell history) alongside the repo, making it easier to have a consistent environment for working on one project across multiple systems. - Make separate system-specific "master" branches for the main projects directory on each system, then edit and push changes from any other. For example, prep the projects directory on an infrequently-used laptop from your desktop and push/pull the changes. + +[[!tag unlikely moreinfo]] diff --git a/doc/todo/git_repo_tracking___40__not_storage__41__/comment_2_4af24f7207f63eb4cc127a64c1fc5fc3._comment b/doc/todo/git_repo_tracking___40__not_storage__41__/comment_2_4af24f7207f63eb4cc127a64c1fc5fc3._comment new file mode 100644 index 0000000000..318ced2b37 --- /dev/null +++ b/doc/todo/git_repo_tracking___40__not_storage__41__/comment_2_4af24f7207f63eb4cc127a64c1fc5fc3._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-01-30T18:20:42Z" + content=""" +This seems, at first glance, entirely out of scope for git-annex. + +There are other things that manage lots of git repositories. I've written one +even (myrepos). +"""]] diff --git a/doc/todo/git_smudge_clean_interface_suboptiomal.mdwn b/doc/todo/git_smudge_clean_interface_suboptiomal.mdwn index 05d579f128..3bbdc48941 100644 --- a/doc/todo/git_smudge_clean_interface_suboptiomal.mdwn +++ b/doc/todo/git_smudge_clean_interface_suboptiomal.mdwn @@ -109,3 +109,5 @@ The best fix would be to improve git's smudge/clean interface: * Allow clean filter to read work tree files itself, to avoid overhead of sending huge files through a pipe. + +[[!tag confirmed]] diff --git a/doc/todo/git_status_smudges_unncessarily_after_unlock.mdwn b/doc/todo/git_status_smudges_unncessarily_after_unlock.mdwn index b1b59f0869..dc4042dd62 100644 --- a/doc/todo/git_status_smudges_unncessarily_after_unlock.mdwn +++ b/doc/todo/git_status_smudges_unncessarily_after_unlock.mdwn @@ -9,3 +9,6 @@ use restagePointerFile, but that did not help; git update-index does then smudge it during the `git annex unlock`, which is no faster (but at least doing it then would avoid the surprise of a slow `git status` or `git commit -a`). Afterwards, `git status` then smudged it again, unsure why! +--[[Joey]] + +[[!tag confirmed]] diff --git a/doc/todo/globus_special_remote_as_a___34__transport__34___layer.mdwn b/doc/todo/globus_special_remote_as_a___34__transport__34___layer.mdwn index 79c4153e8e..041be7edb0 100644 --- a/doc/todo/globus_special_remote_as_a___34__transport__34___layer.mdwn +++ b/doc/todo/globus_special_remote_as_a___34__transport__34___layer.mdwn @@ -3,3 +3,4 @@ Decided to ask before jumping into trying to implement it (not that I have any g [[!meta author=yoh]] [[!tag projects/repronim]] +[[!tag moreinfo]] diff --git a/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_6_51b2078904c6a7c0fff2dd7d38791e6b._comment b/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_6_51b2078904c6a7c0fff2dd7d38791e6b._comment new file mode 100644 index 0000000000..1f93114bef --- /dev/null +++ b/doc/todo/globus_special_remote_as_a___34__transport__34___layer/comment_6_51b2078904c6a7c0fff2dd7d38791e6b._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 6""" + date="2020-01-29T15:02:27Z" + content=""" +I'm not clear how the answer to that question would impact git-annex. + +Assuming this is built with external special remotes and/or plain git +remotes, is there something lacking in git-annex to implement it now? +"""]] diff --git a/doc/todo/import_tree.mdwn b/doc/todo/import_tree.mdwn index c33e2f4e0d..923cb3e08a 100644 --- a/doc/todo/import_tree.mdwn +++ b/doc/todo/import_tree.mdwn @@ -268,3 +268,5 @@ decreases as it goes? --- See also, [[adb_special_remote]] + +[[!tag confirmed]] diff --git a/doc/todo/import_tree_from_rsync_special_remote.mdwn b/doc/todo/import_tree_from_rsync_special_remote.mdwn index 92aef75764..d2238d79d3 100644 --- a/doc/todo/import_tree_from_rsync_special_remote.mdwn +++ b/doc/todo/import_tree_from_rsync_special_remote.mdwn @@ -36,3 +36,5 @@ importtree, but there are several roadblocks: So, it seems that, importtree would need to be able to run commands other than rsync on the server. --[[Joey]] + +[[!tag needsthought]] diff --git a/doc/todo/importing_from_special_remote_without_downloading.mdwn b/doc/todo/importing_from_special_remote_without_downloading.mdwn index 3583750131..e1efe5ee5e 100644 --- a/doc/todo/importing_from_special_remote_without_downloading.mdwn +++ b/doc/todo/importing_from_special_remote_without_downloading.mdwn @@ -1 +1,3 @@ The documentation for the new import remote command says, "Importing from a special remote first downloads all new content from it". For many special remotes -- such as Google Cloud Storage or DNAnexus -- checksums and sizes of files can be determined without downloading the files. For other special remotes, data files might have associated checksum files (e.g. md5) stored next to them in the remote. In such cases, it would help to be able to import the files without downloading (which can be costly, especially from cloud provider egress charges), similar to addurl --fast . + +[[!tag needsthought]] diff --git a/doc/todo/improve_memory_usage_of_--all.mdwn b/doc/todo/improve_memory_usage_of_--all.mdwn index 785d8aa2dc..3f940434f3 100644 --- a/doc/todo/improve_memory_usage_of_--all.mdwn +++ b/doc/todo/improve_memory_usage_of_--all.mdwn @@ -12,3 +12,5 @@ An attempt at making it stream via unsafeInterleaveIO failed miserably and that is not the right approach. This would be a good place to use ResourceT, but it might need some changes to the Annex monad to allow combining the two. --[[Joey]] + +[[!tag confirmed]] diff --git a/doc/todo/keep_git-annex_branch_checked_out__63__.mdwn b/doc/todo/keep_git-annex_branch_checked_out__63__.mdwn index 388eb57810..8c21cb01a1 100644 --- a/doc/todo/keep_git-annex_branch_checked_out__63__.mdwn +++ b/doc/todo/keep_git-annex_branch_checked_out__63__.mdwn @@ -1 +1,3 @@ Currently, the git-annex branch is not checked out, but is accessed as needed with commands like git-cat. Could git-annex work faster if it kept the git-annex branch checked out? Especially if one could designate a fast location (like a ramdisk) for keeping the checked-out copy. Maybe git-worktree could be used to tie the separate checkout to the repository. + +[[!tag unlikely]] diff --git a/doc/todo/key_checksum_from_chunk_checksums.mdwn b/doc/todo/key_checksum_from_chunk_checksums.mdwn index 7d30881255..a736380c6b 100644 --- a/doc/todo/key_checksum_from_chunk_checksums.mdwn +++ b/doc/todo/key_checksum_from_chunk_checksums.mdwn @@ -1 +1,3 @@ Would it be hard to add a variantion to checksumming [[backends]], that would change how the checksum is computed: instead of computing it on the whole file, it would first be computed on file chunks of given size, and then the final checksum computed on the concatenation of the chunk checksums? You'd add a new [[key field|internals/key_format]], say cNNNNN, specifying the chunking size (the last chunk might be shorter). Then (1) for large files, checksum computation could be parallelized (there could be a config option specifying the default chunk size for newly added files); (2) I often have large files on a remote, for which I have md5 for each chunk, but not for the full file; this would enable me to register the location of these fies with git-annex without downloading them, while still using a checksum-based key. + +[[!tag needsthought]] diff --git a/doc/todo/key_checksum_from_chunk_checksums/comment_4_e766f725e22ea0a38ac2277e08fd3839._comment b/doc/todo/key_checksum_from_chunk_checksums/comment_4_e766f725e22ea0a38ac2277e08fd3839._comment new file mode 100644 index 0000000000..82e760c8cf --- /dev/null +++ b/doc/todo/key_checksum_from_chunk_checksums/comment_4_e766f725e22ea0a38ac2277e08fd3839._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="Chel" + avatar="http://cdn.libravatar.org/avatar/a42feb5169f70b3edf7f7611f7e3640c" + subject="comment 4" + date="2020-01-26T22:48:07Z" + content=""" +Another theoretical use case (not available for now, but maybe for the future): +verify with checksums parts of the file and re-download only those parts/chunks, that are bad. +For this you need a checksum for each chunk and a \"global\" checksum in key, that somehow incorporates all these chunk checksums. +An example of this is Tiger Tree Hash in file sharing. + +When I used the SHA256 backend in my downloads, I often wondered that the long process of checksumming a movie +or an OS installation .iso is not ideal. Because if the file download is not finished, I get the wrong checksum, +and the whole process needs to be repeated. + +And in the future git-annex can integrate a FUSE filesystem and literally store just chunks of files, +but represent files as a whole in this virtual filesystem view. +"""]] diff --git a/doc/todo/key_checksum_from_chunk_checksums/comment_5_561b9bb28c5d375334ce915da75d5ce6._comment b/doc/todo/key_checksum_from_chunk_checksums/comment_5_561b9bb28c5d375334ce915da75d5ce6._comment new file mode 100644 index 0000000000..afd7e7e4ef --- /dev/null +++ b/doc/todo/key_checksum_from_chunk_checksums/comment_5_561b9bb28c5d375334ce915da75d5ce6._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="chunks and checksums" + date="2020-01-28T18:20:34Z" + content=""" +\"verify with checksums parts of the file and re-download only those parts/chunks, that are bad.\" -- if I understand correctly, git-annex doesn't checksum [[chunks|chunking]], but can tell incompletely downloaded chunks based on size. + +My original use case (registering the presence of a chunked file in a remote without downloading it) might be implementable with [[todo/setpresentkey_option_to_record_chunked_state/]]. The checksums of the chunks would not be used though. + +"""]] diff --git a/doc/todo/learn_about_remotes_that_are_currently_unavailable.mdwn b/doc/todo/learn_about_remotes_that_are_currently_unavailable.mdwn index d37aedee66..458db9b7fd 100644 --- a/doc/todo/learn_about_remotes_that_are_currently_unavailable.mdwn +++ b/doc/todo/learn_about_remotes_that_are_currently_unavailable.mdwn @@ -26,3 +26,5 @@ It could undo the de-prioritization when it sees that the network has changed. --[[Joey]] + +[[!tag needsthought]] diff --git a/doc/todo/let_external_remotes_declare_support_for_named_pipes.mdwn b/doc/todo/let_external_remotes_declare_support_for_named_pipes.mdwn new file mode 100644 index 0000000000..d7a8bba6b5 --- /dev/null +++ b/doc/todo/let_external_remotes_declare_support_for_named_pipes.mdwn @@ -0,0 +1,5 @@ +In the [[design/external_special_remote_protocol]], the `File` parameter of various requests is specified to be a regular file. If it could be a named pipe, this would open up useful possibilities: [[todo/git-annex-cat]], [[todo/transitive_transfers]], [[todo/git-annex-export_--from_option]], [[todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/]], [[todo/to_and_from_multiple_remotes]], faster [[`git-annex-fsck --from`|git-annex-fsck]], passing named pipes on `git-annex` command line (for streaming the outputs of a running command directly to a remote, or using `git-annex` as a building block of larger workflows), and maybe others. + +An optional protocol request `NAMEDPIPESSUPPORTED`, similar to [[`EXPORTSUPPORTED`|design/external_special_remote_protocol/export_and_import_appendix#index1h2]], could tell `git-annex` that the remote supports named pipes. For remotes that don't declare such support, it could be emulated: before sending e.g. `TRANSFER STORE Key File`, if `File` is a pipe and the remote hasn't said it supports pipes, `git-annex` would drain the pipe to a `TempFile` and then send `TRANSFER STORE Key TempFile` instead. Then the rest of `git-annex` can presume pipes support. + +[[!tag needsthought]] diff --git a/doc/todo/limit_to_low_cost_remotes.mdwn b/doc/todo/limit_to_low_cost_remotes.mdwn index d92d674b68..9e38f67a82 100644 --- a/doc/todo/limit_to_low_cost_remotes.mdwn +++ b/doc/todo/limit_to_low_cost_remotes.mdwn @@ -14,3 +14,6 @@ drives, but not from the network. --[[Joey]] > repository, in another repository it could be a fairly low cost. The user > would need to examine all the costs to pick the cost they want; using > remote names seems better UI. --[[Joey]] + +> > that seems convincing reason not to implement this and instead +> > implement remote groups. [[wontfix|done]] --[[Joey]] diff --git a/doc/todo/lockdown_hooks.mdwn b/doc/todo/lockdown_hooks.mdwn index 0ec8b46241..8ca0ca5540 100644 --- a/doc/todo/lockdown_hooks.mdwn +++ b/doc/todo/lockdown_hooks.mdwn @@ -48,3 +48,5 @@ fed the names of files to operate on via stdin. > These hooks may be too specific to this purpose, while a more generalized > hook could also support things like [[storing_xattrs|support_for_storing_xattrs]] > --[[Joey]] + +[[!tag needsthought]] diff --git a/doc/todo/machine_readable_information_about_reason_of_failure_in_enableremote.mdwn b/doc/todo/machine_readable_information_about_reason_of_failure_in_enableremote.mdwn index 632c474a46..6518cd3e58 100644 --- a/doc/todo/machine_readable_information_about_reason_of_failure_in_enableremote.mdwn +++ b/doc/todo/machine_readable_information_about_reason_of_failure_in_enableremote.mdwn @@ -4,3 +4,4 @@ May be even more generally -- if there was a robust way to obtain errors for oth [[!meta author=yoh]] [[!tag projects/datalad]] +[[!tag moreinfo]] diff --git a/doc/todo/make___34____Try_making_some_of_these_repositories_available__34___more_informative.mdwn b/doc/todo/make___34____Try_making_some_of_these_repositories_available__34___more_informative.mdwn index cad8afb752..b92e91b160 100644 --- a/doc/todo/make___34____Try_making_some_of_these_repositories_available__34___more_informative.mdwn +++ b/doc/todo/make___34____Try_making_some_of_these_repositories_available__34___more_informative.mdwn @@ -29,4 +29,5 @@ although those remote descriptions/names give an idea for an informed user, they """]] [[!meta author=yoh]] -[[!tag project/dandi]] +[[!tag projects/dandi]] +[[!tag needsthought]] diff --git a/doc/todo/make_annex_info_more_efficient.mdwn b/doc/todo/make_annex_info_more_efficient.mdwn index eeb10dbfa5..274e0e8b64 100644 --- a/doc/todo/make_annex_info_more_efficient.mdwn +++ b/doc/todo/make_annex_info_more_efficient.mdwn @@ -2,3 +2,5 @@ ATM it takes about a minute for 'git annex info' on a sizeable but not huge repo [[!meta author=yoh]] [[!tag projects/datalad]] + +> I sense this one has reached its end, it's fast enough, so [[done]] --[[Joey]] diff --git a/doc/todo/metadata_batch_command_should_allow_changes_by_key.mdwn b/doc/todo/metadata_batch_command_should_allow_changes_by_key.mdwn index d249017288..64ec724c0e 100644 --- a/doc/todo/metadata_batch_command_should_allow_changes_by_key.mdwn +++ b/doc/todo/metadata_batch_command_should_allow_changes_by_key.mdwn @@ -14,3 +14,6 @@ to my surprise all i got was the retrial of the existing meta-data instead of th IHO git annex should allow to store metadata in batch mode by key [[!meta title="metadata --batch parses json strictly, loosen?"]] + +> [[done]] I guess, as there's been no response to my question in over a +> year. --[[Joey]] diff --git a/doc/todo/more_efficient_resolution_of_trivial_export_conflicts.mdwn b/doc/todo/more_efficient_resolution_of_trivial_export_conflicts.mdwn index a1dd95aa09..4ebdad5c4c 100644 --- a/doc/todo/more_efficient_resolution_of_trivial_export_conflicts.mdwn +++ b/doc/todo/more_efficient_resolution_of_trivial_export_conflicts.mdwn @@ -14,3 +14,5 @@ then B exported a tree containing `[foo, bar]`, and then A exported So, if one exported tree is a subset of the other, it's not necessary to unexport files added by the other tree. It's sufficient to check that files are present in the export and upload any that are missing. --[[Joey]] + +[[!tag confirmed]] diff --git a/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_2_b0260afbe4c8c49caa20df4f5122a00a._comment b/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_2_b0260afbe4c8c49caa20df4f5122a00a._comment new file mode 100644 index 0000000000..9df4bb277c --- /dev/null +++ b/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_2_b0260afbe4c8c49caa20df4f5122a00a._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-01-30T17:45:39Z" + content=""" +I don't think that git-annex can generally abort an operation that is +outright hung. While it's certianly possible to kill a worker thread, if +that thread has other threads associated with it, they could keep on using +resources. And if an external command is hung, the command would keep +running. The only way to guarantee such an abort is to kill the whole +git-annex process and let the signal reap its children. That's what the +assistant does when the UI is used to stop a transfer, it kills the whole +`git-annex transferkeys` process. + +(A locked git index file does not prevent git-annex from making transfers +so AFAICS the comment above is not relevant.) +"""]] diff --git a/doc/todo/option_to_avoid_slow_transfers.mdwn b/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_3_281e9061a7940171a1febde6c3dca95f._comment similarity index 70% rename from doc/todo/option_to_avoid_slow_transfers.mdwn rename to doc/todo/more_extensive_retries_to_mask_transient_failures/comment_3_281e9061a7940171a1febde6c3dca95f._comment index 177cead0b9..0539b94fac 100644 --- a/doc/todo/option_to_avoid_slow_transfers.mdwn +++ b/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_3_281e9061a7940171a1febde6c3dca95f._comment @@ -1,3 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-01-30T17:52:31Z" + content=""" +Moving a similar todo I wrote to here: + I'd like an option that makes transfers (get,copy,etc) of files fail if the transfer speed falls below a given rate. @@ -8,6 +15,5 @@ at the usual speed and skipping the ones that are coming too slow. Then I can see what files it failed on and either resume those or see if I have a copy of them somewhere else. -I imagine there could be other use cases... - ---[[Joey]] +(Unfortunatly implementing that has the same problems..) +"""]] diff --git a/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_4_418a9cbc38142ec1b2d08fd617a3e4d4._comment b/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_4_418a9cbc38142ec1b2d08fd617a3e4d4._comment new file mode 100644 index 0000000000..8141141765 --- /dev/null +++ b/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_4_418a9cbc38142ec1b2d08fd617a3e4d4._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="aborting stuck operations so they can be retried" + date="2020-02-05T16:39:36Z" + content=""" +\"The only way to guarantee such an abort is to kill the whole git-annex process and let the signal reap its children\" -- then maybe the initial `git-annex` command can be made a wrapper that starts a separate `git-annex` process to do the actual work, monitors its progress, and kills/reaps/restarts it if it gets stuck? Or `-Jn` could work by starting up several separate git-annex processes, [[each handling a subset of files|parallel_possibilities/#comment-304240ba804513291c1a996b8eb3fd1c]], and the original process could kill/reap/restart any sub-process that gets stuck. This of course presumes idempotent operations. +"""]] diff --git a/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_5_69b132f465851421acb7e5edf009995d._comment b/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_5_69b132f465851421acb7e5edf009995d._comment new file mode 100644 index 0000000000..e764bdd56b --- /dev/null +++ b/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_5_69b132f465851421acb7e5edf009995d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="retries due to locked index file" + date="2020-02-05T16:59:40Z" + content=""" +\"A locked git index file does not prevent git-annex from making transfers\" -- by \"mask transient failures\" I meant all types of failures, not just transfers. So e.g. if concurrent operations fail due to contention for the index file lock, retries (after increasing, randomized intervals) could mask the failure. This would help especially for writing scripts/tools on top of git-annex. Logically, some operations -- like `git-annex-add` -- should never fail, and being able to assume that makes scripting easier. +"""]] diff --git a/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_6_5a7771a6169caa90fd91bc6ea7b4fe3d._comment b/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_6_5a7771a6169caa90fd91bc6ea7b4fe3d._comment new file mode 100644 index 0000000000..51b9a693b3 --- /dev/null +++ b/doc/todo/more_extensive_retries_to_mask_transient_failures/comment_6_5a7771a6169caa90fd91bc6ea7b4fe3d._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="example of where retries could help" + date="2020-02-05T22:19:26Z" + content=""" +As one example, I just had a `git-annex-copy` command fail twice with `git-annex: thread blocked indefinitely in an STM transaction`, then have the same command succeed (or at least get much further -- still running) on the third try. I can write my own wrappers to mask such errors, but a built-in implementation seems generally useful and would know better which failures are likely transient. +"""]] diff --git a/doc/todo/moreinfo.mdwn b/doc/todo/moreinfo.mdwn new file mode 100644 index 0000000000..47e9041ceb --- /dev/null +++ b/doc/todo/moreinfo.mdwn @@ -0,0 +1,5 @@ +This tags is for todo items needing followup from their submitter to better +explain why this should be on git-annex's todo list. + +Feel free to remove this tag from the todo item if you've responded to a +question. diff --git a/doc/todo/needsthought.mdwn b/doc/todo/needsthought.mdwn new file mode 100644 index 0000000000..e6cb3313ea --- /dev/null +++ b/doc/todo/needsthought.mdwn @@ -0,0 +1,3 @@ +This tag is for todo items that we have not had time to develop a plan to +address. Someone needs to look at such a todo and think about it and +suggest a way to move it forward. diff --git a/doc/todo/network_test_suite.mdwn b/doc/todo/network_test_suite.mdwn index 337dbaf11f..eea7df952f 100644 --- a/doc/todo/network_test_suite.mdwn +++ b/doc/todo/network_test_suite.mdwn @@ -26,3 +26,5 @@ In any case, the new test suite would need to be run somewhere; running it on at least some of the autobuilders might be a good way. --[[Joey]] + +[[!tag confirmed]] diff --git a/doc/todo/operate_on_files_affected_by_a_commit_range.mdwn b/doc/todo/operate_on_files_affected_by_a_commit_range.mdwn index 74047fcf90..e6455e95db 100644 --- a/doc/todo/operate_on_files_affected_by_a_commit_range.mdwn +++ b/doc/todo/operate_on_files_affected_by_a_commit_range.mdwn @@ -1 +1,4 @@ Sometimes you want to operate on files touched by commits in a range, e.g. to `git-annex-copy` files added in the last 10 commits to an S3 special remote. Could the option be added, to commands that take a path to operate on, to give a commit range, with the meaning "operate on files changed by these commits"? + +> Since my comment gives a way to do it, and there was no followup, I think +> this is [[done]] --[[Joey]] diff --git a/doc/todo/optimise_by_converting_Ref_to_ByteString.mdwn b/doc/todo/optimise_by_converting_Ref_to_ByteString.mdwn index 11328a9f37..6434c4de3d 100644 --- a/doc/todo/optimise_by_converting_Ref_to_ByteString.mdwn +++ b/doc/todo/optimise_by_converting_Ref_to_ByteString.mdwn @@ -1,3 +1,5 @@ Profiling of `git annex find --not --in web` suggests that converting Ref to contain a ByteString, rather than a String, would eliminate a fromRawFilePath that uses about 1% of runtime. + +[[!tag confirmed]] diff --git a/doc/todo/optimise_journal_access.mdwn b/doc/todo/optimise_journal_access.mdwn index a49441cf5e..37480ab48b 100644 --- a/doc/todo/optimise_journal_access.mdwn +++ b/doc/todo/optimise_journal_access.mdwn @@ -19,3 +19,5 @@ writer and it would have already behaved as it would after the change. But: When a process writes to the journal, it will need to update its state to remember it's no longer empty. --[[Joey]] + +[[!tag confirmed]] diff --git a/doc/todo/option_for___40__fast__41___compression_on_special_remotes_like___34__directory__34__.mdwn b/doc/todo/option_for___40__fast__41___compression_on_special_remotes_like___34__directory__34__.mdwn index cf5f9d1ea4..c88d0ebb18 100644 --- a/doc/todo/option_for___40__fast__41___compression_on_special_remotes_like___34__directory__34__.mdwn +++ b/doc/todo/option_for___40__fast__41___compression_on_special_remotes_like___34__directory__34__.mdwn @@ -4,3 +4,5 @@ there are use-cases in which it would come in handy to have an option for a spec For example, I use git annex for very large scientific tomographic datasets and files originating from their processing like segmentations, distance maps, skeletons. While compressing the raw data makes little sense, compression e.g. segmentations and skeletons has a huge impact on the effective files size. Since compressing files of a few GBs to TBs is time consuming, I prefer to have an uncompressed version in the working tree (so I do not use file formats that are using compression by default e.g. .nii.gz) but it would be very helpful to have the option to push precious or older versions to a remote that then uses compression. Using encryption for this is a bit of an overkill and takes considerably longer than compressing with e.g. `pbzip`. A compressed file system for this purpose is no option, because the special remote is supposed to live on a restrictive archive server. Though, I guess, it would be possible to write a special remote wrapper for this, I wonder if this might qualify as an officially supported option to the already existing special remotes like "directory" or "rsync". E.g. in conjunction to `encryption` something like `compression` with possible values like `pbzip`, `bzip`, `pigz` and `gzip`. + +[[!tag confirmed]] diff --git a/doc/todo/option_to_add_user-specified_string_to_key.mdwn b/doc/todo/option_to_add_user-specified_string_to_key.mdwn index 140a0a0f8f..1b392c7cad 100644 --- a/doc/todo/option_to_add_user-specified_string_to_key.mdwn +++ b/doc/todo/option_to_add_user-specified_string_to_key.mdwn @@ -11,3 +11,5 @@ Cleaner would be to add a field to the key, as in MD5E-s0-uUSERKEYSTRING--d41d8c This enables attaching metadata not to file contents, but to the file itself; or partitioning keys (and therefore key metadata) into namespaces. The downside is some loss of deduplication. This loss may be acceptable. The loss can be mitigated for local repo and non-special remotes: after storing an object with e.g. MD5 d41d8cd98f00b204e9800998ecf8427e under .git/annex/objects, check if there is a symlink .git/annex/contenthash/d41d8cd98f00b204e9800998ecf8427e ; if not, make this a symlink to the object just stored; if yes, erase the object just stored, and hardlink the symlink's target instead. + +[[!tag unlikely moreinfo]] diff --git a/doc/todo/option_to_add_user-specified_string_to_key/comment_5_f2701eabebd95db02497ee6191897198._comment b/doc/todo/option_to_add_user-specified_string_to_key/comment_5_f2701eabebd95db02497ee6191897198._comment new file mode 100644 index 0000000000..cdff4fb16a --- /dev/null +++ b/doc/todo/option_to_add_user-specified_string_to_key/comment_5_f2701eabebd95db02497ee6191897198._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2020-01-30T18:58:13Z" + content=""" +Is there any reason to leave this todo open since [[external_backends]] +would presumably let it be implemented? +"""]] diff --git a/doc/todo/option_to_block_git_from_pushing_references_to_not-yet-saved_contents.mdwn b/doc/todo/option_to_block_git_from_pushing_references_to_not-yet-saved_contents.mdwn index 21b6e8d105..d90d8b1b56 100644 --- a/doc/todo/option_to_block_git_from_pushing_references_to_not-yet-saved_contents.mdwn +++ b/doc/todo/option_to_block_git_from_pushing_references_to_not-yet-saved_contents.mdwn @@ -1,3 +1,5 @@ One way I've lost data is to git-annex-add it in an untrusted temp clone of a repo, then commit and push the git branch, but forget to git-annex-copy the annexed contents referenced by that branch to a GloballyAvailable, (semi-)trusted remote. Then, when the temp clone is gone, the branch pushed to the repo is referencing permanently dead files. Maybe, git-annex-init could install a pre-push hook to check for this, and abort the push if it happens? Basically, to ensure that whatever data is referenced by pushed branches will actually be at least potentially get-table. Even if the current repo is not temp/untrusted, when sharing data with someone, you may want to ensure that any annexed files referenced by a pushed branch are actually potentially available. + +[[!tag moreinfo]] diff --git a/doc/todo/option_to_put_temp_files_on_a_RAM_disk.mdwn b/doc/todo/option_to_put_temp_files_on_a_RAM_disk.mdwn new file mode 100644 index 0000000000..04a612dcf6 --- /dev/null +++ b/doc/todo/option_to_put_temp_files_on_a_RAM_disk.mdwn @@ -0,0 +1,5 @@ +Add an option to give git-annex a path to a RAM disk, and an option to set the maximum space to be used there. git-annex often knows the size of the files it is downloading, since it's part of the key, so can determine in advance if a tempfile of that size would fit on the RAM disk. One could instead symlink `.git/annex/tmp/` to a RAM disk, but this could cause memory overflow if a large file is transferred. + +Related: [[todo/keep_git-annex_branch_checked_out__63__]], [[todo/transitive_transfers]] + +[[!tag unlikely]] diff --git a/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_1_28a5627604d2d4b25c51779a7216931d._comment b/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_1_28a5627604d2d4b25c51779a7216931d._comment new file mode 100644 index 0000000000..0e044d5008 --- /dev/null +++ b/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_1_28a5627604d2d4b25c51779a7216931d._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="https://christian.amsuess.com/chrysn" + nickname="chrysn" + avatar="http://christian.amsuess.com/avatar/c6c0d57d63ac88f3541522c4b21198c3c7169a665a2f2d733b4f78670322ffdc" + subject="Use of the RAM disk" + date="2020-01-28T14:03:15Z" + content=""" +What benefit would that give? + +When the transfer is complete, the file will be moved over to `.git/annex/objects`. On the same file system, that's a simple operation; across file systems, that's effectively a copy. + +In both cases, the file gets written to disk once. In the original case, it's up to the operating system when to start writing the data to disk (that is, unless the file is flushed by git-annex, which I don't have reason to assume it does). With a RAM disk inbetween, the file would be copied only when it's transferred completely (and then needs to be moved once more to not show up as an incomplete file at its final location). With the original setup, if the operating system has RAM to spare, it can do roughly that already (not start writing until the file is closed). When it's under pressure, it will flush the file out as soon as possible. + +Is there any performance issue you see that'd be solved using the RAM disk? If so, that might be indicative of something git-annex can do without starting to mount around (eg. remove any syncs / flushes that sneaked into the tempfile saving process, or use fallocate to tell the OS of the size to come). +"""]] diff --git a/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_2_1df752ac3b9cb2cc0e4a7dd4af71897f._comment b/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_2_1df752ac3b9cb2cc0e4a7dd4af71897f._comment new file mode 100644 index 0000000000..5840e4f35d --- /dev/null +++ b/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_2_1df752ac3b9cb2cc0e4a7dd4af71897f._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="use of RAM disk" + date="2020-01-28T17:23:28Z" + content=""" +You're right in general. There may be cases though, where a temp file doesn't just get moved into [[`.git/annex/objects`|internals]]: e.g. when [[chunking]] is used along with parallel downloads, chunks might go into separate temp files before being merged. I was also thinking of use cases from [[todo/let_external_remotes_declare_support_for_named_pipes]], like [[todo/git-annex-cat]], where key contents is processed but not saved. +"""]] diff --git a/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_3_12a1b6f9fd616f5c498d5aff1cf1bcb6._comment b/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_3_12a1b6f9fd616f5c498d5aff1cf1bcb6._comment new file mode 100644 index 0000000000..a7c70bfbb7 --- /dev/null +++ b/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_3_12a1b6f9fd616f5c498d5aff1cf1bcb6._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="https://christian.amsuess.com/chrysn" + nickname="chrysn" + avatar="http://christian.amsuess.com/avatar/c6c0d57d63ac88f3541522c4b21198c3c7169a665a2f2d733b4f78670322ffdc" + subject="Re: use of RAM disk" + date="2020-01-29T07:49:08Z" + content=""" +The chunks case should fold into the original one if git-annex merges the chunks using [ioctl_ficlonerange](https://manpages.debian.org/buster/manpages-dev/ioctl_ficlonerange.2.en.html), but admittedly that is a) not portable (but neither is mounting a RAM-disk) and b) will only work on some file systems. + +I don't understand the applications in named pipes well enough to comment there (will have to read up a bit). + +But more generally, I'd gut-feeling-expect that if all is properly advertised (possibly by a fcntl, but [RWH_WRITE_LIFE_SHORT](https://manpages.debian.org/buster/manpages-dev/fcntl.2.en.html) doesn't quite seem to be it) and no fsyncs are sent (like [eatmydata](https://www.flamingspork.com/projects/libeatmydata/) does), any file should behave like that until a file system action is performed that forces it to be committed to disk -- or the kernel decides that it'd better use that RAM for something else, but that's what it can probably do best. + +I'm not sure the approach of screening (and possibly patching) data producers to not fsync (on some systems, closing might be an issue too, and that's where it gets more complex) is better than putting things to a RAM disk, I just think it's an alternative worth exploring. +"""]] diff --git a/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_4_8c6aa3f5aee359f2f161b6664cdb5c32._comment b/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_4_8c6aa3f5aee359f2f161b6664cdb5c32._comment new file mode 100644 index 0000000000..65f4961309 --- /dev/null +++ b/doc/todo/option_to_put_temp_files_on_a_RAM_disk/comment_4_8c6aa3f5aee359f2f161b6664cdb5c32._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2020-01-30T16:44:52Z" + content=""" +When git-annex downloads chunks, it downloads one chunk at a time +(no parallelisation downloads of chunks of the same key) to either a temp +file or a memory buffer, decrypts if necessary, and then appends the +chunk to the destination file. + +Since chunks are often stored entirely in ram, the chunk size is typically +a small fraction of ram. It seems unlikely to me that the kernel would +often decide to unncessarily flush a small write to a temp file out to disk +and drop it from the cache when the very next operation after writing the +file is reading it back in. + +chrysn's analysis seems right. + +Also, this smells of premature optimisation, and tying it to features that +have not even been agreed on, let alone implemented, makes it kind of super +low priority? +"""]] diff --git a/doc/todo/p2p_protocol_flag_days.mdwn b/doc/todo/p2p_protocol_flag_days.mdwn index 6dc90727e5..93902a9090 100644 --- a/doc/todo/p2p_protocol_flag_days.mdwn +++ b/doc/todo/p2p_protocol_flag_days.mdwn @@ -20,3 +20,5 @@ At some point in the future, once all git-annex and git-annex-shell can be assumed to be upgraded to 6.20180312, this fallback can be removed. It will allows removing a lot of code from git-annex-shell and a lot of fallback code from Remote.Git. + +[[!tag confirmed]] diff --git a/doc/todo/parallel_possibilities/comment_2_47499773db465e8b759d7c96cd7713a7._comment b/doc/todo/parallel_possibilities/comment_2_47499773db465e8b759d7c96cd7713a7._comment new file mode 100644 index 0000000000..ad2e5e9b2e --- /dev/null +++ b/doc/todo/parallel_possibilities/comment_2_47499773db465e8b759d7c96cd7713a7._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-01-30T19:24:47Z" + content=""" +How would running parallel commands with xargs be better than the current +-J? +"""]] diff --git a/doc/todo/patch_generation_with_annexed_files.mdwn b/doc/todo/patch_generation_with_annexed_files.mdwn index b0c8560d6b..dfe86e5de9 100644 --- a/doc/todo/patch_generation_with_annexed_files.mdwn +++ b/doc/todo/patch_generation_with_annexed_files.mdwn @@ -117,3 +117,5 @@ to a change to the master branch. But room needs to be left to add this kind of thing. Ie, what git-annex adds to the git patch needs to have its own expansion point. + +[[!tag needsthought]] diff --git a/doc/todo/per-branch_git-annex_branch.mdwn b/doc/todo/per-branch_git-annex_branch.mdwn index 542bdef177..059144fa63 100644 --- a/doc/todo/per-branch_git-annex_branch.mdwn +++ b/doc/todo/per-branch_git-annex_branch.mdwn @@ -1 +1,3 @@ Currently, if I do some work on an experimental branch, creating some annexed files, then abandon the branch, information about keys created on the experimental branch will remain in the git-annex branch. This breaks git's normal notion of lightweight branching, where you can work on an experimental branch and, if you later decide to abandon that work, it'll be as if the experimental branch never existed. Maybe, it would make sense to have, for each branch mybranch, a corresponding branch git-annex-b/mybranch , which would hold the state of the git-annex branch reflecting work on mybranch? Then, if you decide to merge mybranch into master, git-annex-b/mybranch would get union-merged into the git-annex branch (or into git-annex-b/master). But if you decide to abandon/delete mybranch, git-annex-b/mybranch can be abandoned/deleted with no trace left in the main git-annex branch. + +> [[wontfix|done]] --[[Joey]] diff --git a/doc/todo/per-branch_git-annex_branch/comment_1_a6218eeed79ba2e068b28535e781bf79._comment b/doc/todo/per-branch_git-annex_branch/comment_1_a6218eeed79ba2e068b28535e781bf79._comment new file mode 100644 index 0000000000..5c5353b2e4 --- /dev/null +++ b/doc/todo/per-branch_git-annex_branch/comment_1_a6218eeed79ba2e068b28535e781bf79._comment @@ -0,0 +1,28 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-30T18:10:49Z" + content=""" +That won't work, and here's why: + +You're in master, and you git checkout -b tmp + +Now you're in tmp, and you git-annex move foo --from origin + +Now you git checkout master. You delete tmp and tmp/git-annex. + +Except, foo has been moved from origin to the local repo. So now the local +repo doesn't know it contains foo, at least until git-annex fsck notices +it's there. Worse, no other repo knows where foo went, only that it was +deleted from origin. + +Notice also that, even if you keep tmp around, tmp/git-annex must never get +pushed, unless tmp get merged back into master. So even without deleting +tmp, you get into this situation where other clones don't know where the +file went. + +--- + +git-annex v0 behaved just like this, and it quickly became apparent that it +was not a good idea due to this kind of scenario. +"""]] diff --git a/doc/todo/preserve_file_extensions_in_WORM_and_URL_keys.mdwn b/doc/todo/preserve_file_extensions_in_WORM_and_URL_keys.mdwn index c9ff47cded..c24e0c12e9 100644 --- a/doc/todo/preserve_file_extensions_in_WORM_and_URL_keys.mdwn +++ b/doc/todo/preserve_file_extensions_in_WORM_and_URL_keys.mdwn @@ -1 +1,3 @@ Right now, when computing a WORM key from a relative path or a URL key from a URL, if the original string is longer than a SHA256 checksum, its tail is replaced with its md5. Unfortunately, this eats up the file extension(s) at the end, causing the issues that \*E backends solve. It would be better to keep the tail of the path and replace the start or the middle with the md5, preserving extensions (as configured in annex.maxextensionlength) the same way \*E backends do. Maybe also, add a config option for the length beyond which the replacement-with-checksum happens? + +[[!tag confirmed]] diff --git a/doc/todo/preserve_file_extensions_in_WORM_and_URL_keys/comment_1_3cf91bca8e01f98644b0d25a45ecf092._comment b/doc/todo/preserve_file_extensions_in_WORM_and_URL_keys/comment_1_3cf91bca8e01f98644b0d25a45ecf092._comment new file mode 100644 index 0000000000..03b9ab9f67 --- /dev/null +++ b/doc/todo/preserve_file_extensions_in_WORM_and_URL_keys/comment_1_3cf91bca8e01f98644b0d25a45ecf092._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-30T18:43:19Z" + content=""" +I think it would be safe to make this change for WORM keys, which +certianly don't need to generate the same key for 2 files with the same +name. + +Less sure about URL keys, if two git-annex addurls versions pick different +keys for the same url, then there would be a merge conflict, where +currently there is not. I think I've addurled the same url in different +clones of a repo before, probably. Although addurl with and without --fast +or --relaxed also causes that problem and maybe it's not worth worrying +about it. +"""]] diff --git a/doc/todo/provide_machine_readable___40__--json__63____41___version_of_initremote_--whatelse.mdwn b/doc/todo/provide_machine_readable___40__--json__63____41___version_of_initremote_--whatelse.mdwn new file mode 100644 index 0000000000..8e32ba0d48 --- /dev/null +++ b/doc/todo/provide_machine_readable___40__--json__63____41___version_of_initremote_--whatelse.mdwn @@ -0,0 +1,59 @@ +
+ATM it is a formatted text (click to expand) + +```shell +$> git annex initremote myrsync type=rsync --whatelse +shellescape + avoid usual shell escaping (not recommended) + (yes or no) +rsyncurl + (required) url or hostname:/directory for rsync to use +chunk + size of chunks (eg, 1MiB) +encryption + how to encrypt data stored in the special remote + (hybrid or none or pubkey or shared or sharedpubkey) +embedcreds + embed credentials into git repository + (yes or no) +mac + how to encrypt filenames used on the remote + (HMACSHA1 or HMACSHA224 or HMACSHA256 or HMACSHA384 or HMACSHA512) +keyid + gpg key id +keyid+ + add additional gpg key +keyid- + remove gpg key +exporttree + export trees of files to this remote + (yes or no) +importtree + import trees of files from this remote + (yes or no) +``` +
+ +which would make it necessary to establish a possibly fragile parsing by any tool which would like to programmatically obtain/use/expose those options. + +It would be great if there was a way to trigger such listing be output in more friendly for machines form? e.g. a json dictionary alike + +```json +{ + "rsyncurl": { + "required": True, + "description": "url or hostname:/directory for rsync to use" + }, + "shellescape": { + "description": "avoid usual shell escaping (not recommended)", + "choices": ["yes", "no"] + }, + ... +} +``` + +Looking at the [protocol](https://git-annex.branchable.com/design/external_special_remote_protocol/) I see no indication of "required" or "choices" to be actually explicitly provided by the remote fields, so I guess just supposed to be included in the text, so may be given current state of things, aforementioned dictionary would be simply ```{"NAME": "DESCRIPTION"}```, which someone makes this proposed TODO less valuable. + + +[[!meta author=yoh]] +[[!tag projects/datalad]] diff --git a/doc/todo/provide_machine_readable___40__--json__63____41___version_of_initremote_--whatelse/comment_1_5ed9864ef616d9e9b77a3d62561363fd._comment b/doc/todo/provide_machine_readable___40__--json__63____41___version_of_initremote_--whatelse/comment_1_5ed9864ef616d9e9b77a3d62561363fd._comment new file mode 100644 index 0000000000..f5065c5461 --- /dev/null +++ b/doc/todo/provide_machine_readable___40__--json__63____41___version_of_initremote_--whatelse/comment_1_5ed9864ef616d9e9b77a3d62561363fd._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-02-17T16:06:26Z" + content=""" +My design process for this feature included almost getting stuck on wanting +some kind of types for the values, and way to track which options are +required, or exclusive of other options, or dependencies of other options, +etc. All stuff that eg, an applicative option parser can support quite +well, but it would complicate the external protocol enourmously, if it +could be represented at all in it. So I had to eliminate all that. + +I think it's fairly uncommon for tab completion to do anything special +about required parameters, or even mutually exclusive options (although +git-annex tab completion does handle the latter), and while I can imagine +a gui interface marking an input field as required, it seems +that would be the least of its problems if it doesn't know what kind of +control to use for the field? + +It would be easy to add --whatelse-json, but it would be limited to the +name, a description of the purpose of the field, and maybe a description +of the expected value or list of valid values. +I'm unsure about the utility of that.. +"""]] diff --git a/doc/todo/provide_windows_build_with_MagicMime.mdwn b/doc/todo/provide_windows_build_with_MagicMime.mdwn new file mode 100644 index 0000000000..4e8c82abd6 --- /dev/null +++ b/doc/todo/provide_windows_build_with_MagicMime.mdwn @@ -0,0 +1,8 @@ +I forgot already why windows build is without having MagicMime support added, and failed to find a reason in the archives. +But [there is demand](https://github.com/datalad/datalad/pull/3956#pullrequestreview-335604805) from Windows users! ;) + +Without such functionality we cannot consistently (cross-platform) use git-annex for many DataLad datasets, where text files (well -- soon "non-binary files") are configured to be committed directly to git. + +[[!meta author=yoh]] +[[!tag projects/datalad]] +[[!tag unlikely]] diff --git a/doc/todo/provide_windows_build_with_MagicMime/comment_1_11adb1623314e8a43c50704c5e6e62d4._comment b/doc/todo/provide_windows_build_with_MagicMime/comment_1_11adb1623314e8a43c50704c5e6e62d4._comment new file mode 100644 index 0000000000..a86e88b100 --- /dev/null +++ b/doc/todo/provide_windows_build_with_MagicMime/comment_1_11adb1623314e8a43c50704c5e6e62d4._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-06T16:55:36Z" + content=""" +It's a C library and it's not vendored into the haskell library as is often +the case, so it has be be linked in. I don't know what would be involved in +getting it installed on windows and bundled with git-annex in a usable state, +and I'm a bit afraid to find out. + +WSL can be used to run linux git-annex on windows, so that's a way already +available to use MagicMime. + +(The entire future of the windows port is currently TBD, it seems likely +that it will go away and WSL be used, aside from the current bugs in WSL +that make sqlite crash.) +"""]] diff --git a/doc/todo/provide_windows_build_with_MagicMime/comment_2_8f2d350c853f8827e1d5f95897dc2bd8._comment b/doc/todo/provide_windows_build_with_MagicMime/comment_2_8f2d350c853f8827e1d5f95897dc2bd8._comment new file mode 100644 index 0000000000..e04f76ca28 --- /dev/null +++ b/doc/todo/provide_windows_build_with_MagicMime/comment_2_8f2d350c853f8827e1d5f95897dc2bd8._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 2" + date="2020-01-06T19:27:23Z" + content=""" +FWIW: + +- I am not yet sure if WSL would ever become a panacea. So far in my attempts I kept running into issues which related to how you (user) expose Windows file system(s) inside WSL, and there might be no \"one setup which fits all the use cases\". + +- There seems to be efforts like https://github.com/nscaife/file-windows which seems to promise building Windows binaries of file and libmagic for Windows - may be it could be used here. + +- IMHO an ideal way for git-annex deployment on windows would have been a conda-forge build of it. Unfortunately it would not facilitate solution here since I do not see [windows build for libmagic](https://anaconda.org/search?q=libmagic). (meanwhile [filed an issue](https://github.com/conda-forge/libmagic-feedstock/issues/11), if resolved - could give an additional motivation to approach [git-annex-feedstock/issues/15](https://github.com/conda-forge/git-annex-feedstock/issues/15) +"""]] diff --git a/doc/todo/provide_windows_build_with_MagicMime/comment_3_15509110c5900aee2cc977372da16f0b._comment b/doc/todo/provide_windows_build_with_MagicMime/comment_3_15509110c5900aee2cc977372da16f0b._comment new file mode 100644 index 0000000000..9921aa6436 --- /dev/null +++ b/doc/todo/provide_windows_build_with_MagicMime/comment_3_15509110c5900aee2cc977372da16f0b._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="michael.hanke@c60e12358aa3fc6060531bdead1f530ac4d582ec" + nickname="michael.hanke" + avatar="http://cdn.libravatar.org/avatar/f881df265a423e4f24eff27c623148fd" + subject="WSL 1/2 experience" + date="2020-01-08T15:20:04Z" + content=""" +I looked into WSL 1 and 2 in the context (and with the hope) of trying to avoid having windows-specific code and documentation in and for datalad. My conclusion was that neither of them is practical (maybe: yet), because with a unix-like default setup of a repository (lock, symlinks) no repository content was accessible from outside the WSL environment. I think the main motivation for running on Windows is using Windows apps. And that specifically is not possible/easily achievable for any random app. +"""]] diff --git a/doc/todo/provide_windows_build_with_MagicMime/comment_4_4727291eefa07a40da13dace75a2e255._comment b/doc/todo/provide_windows_build_with_MagicMime/comment_4_4727291eefa07a40da13dace75a2e255._comment new file mode 100644 index 0000000000..2f1b5b9260 --- /dev/null +++ b/doc/todo/provide_windows_build_with_MagicMime/comment_4_4727291eefa07a40da13dace75a2e255._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="git-annex in docker on Windows" + date="2020-01-08T16:16:31Z" + content=""" +Maybe, git-annex on Windows could be run in a Docker container, operating on Windows files via bind mounts? A conda-forge package could be made that translates git-annex command invocations into 'docker run' commands. +"""]] diff --git a/doc/todo/provide_windows_build_with_MagicMime/comment_5_4633d2a40bca1ba8fa93f7038569715f._comment b/doc/todo/provide_windows_build_with_MagicMime/comment_5_4633d2a40bca1ba8fa93f7038569715f._comment new file mode 100644 index 0000000000..b32a7200b7 --- /dev/null +++ b/doc/todo/provide_windows_build_with_MagicMime/comment_5_4633d2a40bca1ba8fa93f7038569715f._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2020-01-08T18:26:17Z" + content=""" +I guess that `git annex adjust --unlock` would avoid whatever the problem +is with the symlinks. +"""]] diff --git a/doc/todo/publicurl_config_for_all_special_remotes.mdwn b/doc/todo/publicurl_config_for_all_special_remotes.mdwn index 1dc567500b..d9b45e3498 100644 --- a/doc/todo/publicurl_config_for_all_special_remotes.mdwn +++ b/doc/todo/publicurl_config_for_all_special_remotes.mdwn @@ -2,3 +2,4 @@ [[!meta author=yoh]] [[!tag projects/repronim]] +[[!tag needsthought]] diff --git a/doc/todo/publicurl_config_for_all_special_remotes/comment_3_42ac954b120205650a2a5f03872085c2._comment b/doc/todo/publicurl_config_for_all_special_remotes/comment_3_42ac954b120205650a2a5f03872085c2._comment new file mode 100644 index 0000000000..41bb7a369c --- /dev/null +++ b/doc/todo/publicurl_config_for_all_special_remotes/comment_3_42ac954b120205650a2a5f03872085c2._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-01-29T15:04:06Z" + content=""" +I'm not sure how to implement this in git-annex's Remote API. +retrieveKeyFile/retrieveExport would need to check it and download +the url, so that would need modifications of those methods of every +remote that implements this. And it would need to be possible to +enable the remote in readonly mode. + +It might be possible to use a mixin to modify a Remote to support this? +"""]] diff --git a/doc/todo/publicurl_config_for_all_special_remotes/comment_4_3c144c0323d49f7ca2d6b9971ad3160a._comment b/doc/todo/publicurl_config_for_all_special_remotes/comment_4_3c144c0323d49f7ca2d6b9971ad3160a._comment new file mode 100644 index 0000000000..f9e23994b0 --- /dev/null +++ b/doc/todo/publicurl_config_for_all_special_remotes/comment_4_3c144c0323d49f7ca2d6b9971ad3160a._comment @@ -0,0 +1,28 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2020-01-30T15:19:55Z" + content=""" +This will need the remote to provide a function `Key -> FilePath`, +in order to support whatever hash directories or filename mangling the +remote does. It might be better to generalize the function to +`Url -> Key -> Url` where the first url is the publicurl value. +(When exporttree=true, the function is probably not needed.) + +To support that function in external special remotes, the protocol would +need to be extended. Hmm, that means that, in order to get a file, the +external program would need to be installed, even though the actual file +download only needs http. Contrast with the current readonly mode that +doesn't need the external program to be installed since the url is recorded +on the git-annex branch. + +I think that the only built-in remotes that would make sense to support +this are rsync, directory[1], and webdav. s3 already supports it but could +be refactored. git remotes already support http access which is effectively +the same result, and git-lfs already supports unauthed downloads, assuming +the server allows it. + +[1] a bit problimatic because old versions used a different +hash directory than current versions, so unless it can return two urls, +things stored with an old version won't be accessible +"""]] diff --git a/doc/todo/reinit_current_repo_to_new_uuid.mdwn b/doc/todo/reinit_current_repo_to_new_uuid.mdwn index 6c8511d861..faa7810b0a 100644 --- a/doc/todo/reinit_current_repo_to_new_uuid.mdwn +++ b/doc/todo/reinit_current_repo_to_new_uuid.mdwn @@ -1 +1,3 @@ If a git-annex repo is copied (e.g. by creating an AWS volume from a snapshot), there is a possibility of different repo copies with the same UUID. It would help if there was an option to [[`git-annex-reinit`|git-annex-reinit]] that would create a new uuid for the current repo. + +[[!tag moreinfo]] diff --git a/doc/todo/reinit_current_repo_to_new_uuid/comment_3_94434f7e76df4a25d922eed28ca6b559._comment b/doc/todo/reinit_current_repo_to_new_uuid/comment_3_94434f7e76df4a25d922eed28ca6b559._comment new file mode 100644 index 0000000000..5c0a85d0c8 --- /dev/null +++ b/doc/todo/reinit_current_repo_to_new_uuid/comment_3_94434f7e76df4a25d922eed28ca6b559._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-01-30T17:36:09Z" + content=""" +I don't want to complicate the location logs with time-dependent sameas +hacks. + +Is this repo that's been copied a special remote? fsck --fast --from would +then not be very fast since it has to talk to the special remote. A +dedicated command could be faster than that. + +If the repo is a git-annex repo though, I'd expect git annex fsck --fast +to be nearly optimal, the only extra work it does over such a dedicated +command, I think, is a stat of the object file to check if it's present. +"""]] diff --git a/doc/todo/reinit_current_repo_to_new_uuid/comment_4_9d2b98c15d78781f943f0f2ddf775706._comment b/doc/todo/reinit_current_repo_to_new_uuid/comment_4_9d2b98c15d78781f943f0f2ddf775706._comment new file mode 100644 index 0000000000..166df8e01e --- /dev/null +++ b/doc/todo/reinit_current_repo_to_new_uuid/comment_4_9d2b98c15d78781f943f0f2ddf775706._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2020-01-30T19:21:00Z" + content=""" +See [[todo/reinit_should_work_without_arguments]] for another argument for +the same thing. +"""]] diff --git a/doc/todo/reinit_should_work_without_arguments.mdwn b/doc/todo/reinit_should_work_without_arguments.mdwn index 22d3fbbd0a..0b347d7a4b 100644 --- a/doc/todo/reinit_should_work_without_arguments.mdwn +++ b/doc/todo/reinit_should_work_without_arguments.mdwn @@ -63,3 +63,6 @@ repositories: git annex sync Thanks for any feedback or comments... -- [[anarcat]] + +> [[done]], as duplicate of [[todo/reinit_current_repo_to_new_uuid]] +> --[[Joey]] diff --git a/doc/todo/reinit_should_work_without_arguments/comment_3_f4e790b5298b8fb1578ce86cd7fd060e._comment b/doc/todo/reinit_should_work_without_arguments/comment_3_f4e790b5298b8fb1578ce86cd7fd060e._comment new file mode 100644 index 0000000000..e3d833e66f --- /dev/null +++ b/doc/todo/reinit_should_work_without_arguments/comment_3_f4e790b5298b8fb1578ce86cd7fd060e._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-01-30T19:19:53Z" + content=""" +The same thing is being also dicussed at [[todo/reinit_current_repo_to_new_uuid]] +so I'm closing this todo in favor of that one. +"""]] diff --git a/doc/todo/remote-ready-command.mdwn b/doc/todo/remote-ready-command.mdwn index b4cc086769..50873d5524 100644 --- a/doc/todo/remote-ready-command.mdwn +++ b/doc/todo/remote-ready-command.mdwn @@ -17,3 +17,5 @@ git annex find . --in here --and --not --in kbfs | while read filename ``` but this means that every `git annex copy` command creates a new commit per file transferred, rather than a single commit at the end of the transfer. This may not seem like a big deal, but multiplying that over hundreds of files, it adds up to quite a bit of wasted disk space. (I'll also be looking into ways to squash or prune such commits, but it'd be nice to not have to do that.) + +[[!tag confirmed needsthought]] diff --git a/doc/todo/remove_legacy_import_directory_interface.mdwn b/doc/todo/remove_legacy_import_directory_interface.mdwn index d8f8a316c5..b2ef604f73 100644 --- a/doc/todo/remove_legacy_import_directory_interface.mdwn +++ b/doc/todo/remove_legacy_import_directory_interface.mdwn @@ -45,3 +45,5 @@ cases, convert to the new interface, and keep others using the old interface. --[[Joey]] + +[[!tag needsthought]] diff --git a/doc/todo/setpresentkey_option_to_record_chunked_state.mdwn b/doc/todo/setpresentkey_option_to_record_chunked_state.mdwn new file mode 100644 index 0000000000..72bea1b1a3 --- /dev/null +++ b/doc/todo/setpresentkey_option_to_record_chunked_state.mdwn @@ -0,0 +1,3 @@ +In [[git-annex-setpresentkey]], could an option be added to record in [[`aaa/bbb/*.log.cnk`|internals]] that the key contents is present in chunked state, with a given number of chunks of a given size? + +[[!tag moreinfo]] diff --git a/doc/todo/setpresentkey_option_to_record_chunked_state/comment_1_99bf00b8f78d75888dc7bc69d36cd112._comment b/doc/todo/setpresentkey_option_to_record_chunked_state/comment_1_99bf00b8f78d75888dc7bc69d36cd112._comment new file mode 100644 index 0000000000..a422ae3c66 --- /dev/null +++ b/doc/todo/setpresentkey_option_to_record_chunked_state/comment_1_99bf00b8f78d75888dc7bc69d36cd112._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-01-29T14:39:06Z" + content=""" +I would rather not complicate setpresentkey with things not related to +presence information. + +This seems pretty deep into messing with git-annex's internals to seem +plausible to me as a separate command. At some point, you may as well just +write data to the branch yourself. What is the use case? +"""]] diff --git a/doc/todo/setpresentkey_option_to_record_chunked_state/comment_2_42e9309fff6a71c1a0a625dd2bc6aae2._comment b/doc/todo/setpresentkey_option_to_record_chunked_state/comment_2_42e9309fff6a71c1a0a625dd2bc6aae2._comment new file mode 100644 index 0000000000..c53025642f --- /dev/null +++ b/doc/todo/setpresentkey_option_to_record_chunked_state/comment_2_42e9309fff6a71c1a0a625dd2bc6aae2._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="use case to record chunked state" + date="2020-01-29T17:48:40Z" + content=""" +\"What is the use case?\" -- to register with git-annex the presence of a file in a remote, that is already split into chunks, without downloading the file. +\"I would rather not complicate setpresentkey with things not related to presence information\" -- makes sense. +\"you may as well just write data to the branch yourself\" -- yes; just wasn't sure if that might break some invariant, e.g. between the branch and the SQL databases derived from it. + +Don't worry about this todo, I'll figure it out. +"""]] diff --git a/doc/todo/setpresentkey_option_to_record_chunked_state/comment_3_97ed902a268ebad53f534dd48a6f5fc9._comment b/doc/todo/setpresentkey_option_to_record_chunked_state/comment_3_97ed902a268ebad53f534dd48a6f5fc9._comment new file mode 100644 index 0000000000..ebcf5df6cf --- /dev/null +++ b/doc/todo/setpresentkey_option_to_record_chunked_state/comment_3_97ed902a268ebad53f534dd48a6f5fc9._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-01-30T16:41:42Z" + content=""" +That's the use case I'd guessed, but it seems things would have to line up +exactly for it to work. Ie, I think git-annex requires all the chunks to be +the same size, and may fail in unexpected ways if they are not. + +You do not need to worry about any internal caching when modifying the +git-annex branch, because updates to the git-annex branch are always being +merged from remotes, so any such caching has to detect changes and update +itself. +"""]] diff --git a/doc/todo/shorter_keys_through_better_encoding.mdwn b/doc/todo/shorter_keys_through_better_encoding.mdwn index c76598b73d..3b33d0cad8 100644 --- a/doc/todo/shorter_keys_through_better_encoding.mdwn +++ b/doc/todo/shorter_keys_through_better_encoding.mdwn @@ -2,3 +2,6 @@ The link targets of annexed files are currently very long. This creates proble Or, if you're tired of backend requests, maybe implement a scheme for external backends, like the one for external special remotes? For external backend EXTNNN the user would put a script git-annex-external-backend-NNN in the path; the script would support commands like calckey, examinekey . Then I could also implement e.g. canonicalizing backends that strip away variable but semantically irrelevant information before computing the checksum. + +[[!meta title="avoid duplicating key twice in symlink to object file"]] +[[!tag unlikely]] diff --git a/doc/todo/shorter_keys_through_better_encoding/comment_7_d499c66c9a1981bb14d0ce63f4983ea5._comment b/doc/todo/shorter_keys_through_better_encoding/comment_7_d499c66c9a1981bb14d0ce63f4983ea5._comment new file mode 100644 index 0000000000..97479bec0d --- /dev/null +++ b/doc/todo/shorter_keys_through_better_encoding/comment_7_d499c66c9a1981bb14d0ce63f4983ea5._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 7""" + date="2020-01-30T18:49:47Z" + content=""" +Since there is a separate todo item [[external_backends]], let's not +discuss that idea here. + +key/f would have been a great idea to have had 10 years ago. +(Although it does mean that if the object file somehow gets moved out of +its directory, there's no indication in its name that it's a git-annex +object file) + +But if that's all this todo is about, we'd need some kind of transition +plan for existing repos with history containing symlinks to key/key. +I doubt there is a good way to make that transition. +"""]] diff --git a/doc/todo/simpler__44___trusted_export_remotes.mdwn b/doc/todo/simpler__44___trusted_export_remotes.mdwn index 13acd1a7ab..33944b11ee 100644 --- a/doc/todo/simpler__44___trusted_export_remotes.mdwn +++ b/doc/todo/simpler__44___trusted_export_remotes.mdwn @@ -1,3 +1,5 @@ Currently, some issues impede the use of export remotes: (1) they're untrusted, except for versioned ones -- and from those keys cannot be dropped; (2) using them is different than using normal remotes: one can't just copy or move keys to them, one has to first make a tree-ish. Maybe this could be fixed, as follows. To copy a key to an export remote, if the key is not yet present in it, put it under .keys/aaa/bbb/keyname on the remote. That is, take the tree-ish currently on the remote, merge .keys/aaa/bbb/keyname with it, and put that on the remote. To drop a key from an external remote, take the tree-ish currently on the remote, drop all instances of the key from it, and push the changed tree-ish to the remote. To git-annex-export add an option --add , which will add the tree-ish to the tree-ish currently on the remote, without losing any keys currently on the remote: take the tree-ish currently on the remote; overlay on it the treeish being exported; for any files that would be overwritten, if no copies of that key would be left, move it to .keys/aaa/bbb/keyname in the tree-ish that is then pushed to the remote. This way, can always just copy any tree to the remote, without worrying about losing data. + +[[!tag needsthought]] diff --git a/doc/todo/simpler__44___trusted_export_remotes/comment_6_0d27b08792112cbee7a368bc94044bb4._comment b/doc/todo/simpler__44___trusted_export_remotes/comment_6_0d27b08792112cbee7a368bc94044bb4._comment new file mode 100644 index 0000000000..066a51aeee --- /dev/null +++ b/doc/todo/simpler__44___trusted_export_remotes/comment_6_0d27b08792112cbee7a368bc94044bb4._comment @@ -0,0 +1,7 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 6""" + date="2020-01-30T18:05:29Z" + content=""" +I think that the --sameas feature could be used to implement those combo remotes? +"""]] diff --git a/doc/todo/some_way_to_get_a_list_of_options_for_a_special_remote_of_a_given_type.mdwn b/doc/todo/some_way_to_get_a_list_of_options_for_a_special_remote_of_a_given_type.mdwn index 8ad099701f..bcc9dc9ff2 100644 --- a/doc/todo/some_way_to_get_a_list_of_options_for_a_special_remote_of_a_given_type.mdwn +++ b/doc/todo/some_way_to_get_a_list_of_options_for_a_special_remote_of_a_given_type.mdwn @@ -8,4 +8,10 @@ manpage says and indeed I get asked for e.g. encryption to be explicitly specified (why not to default to none). But it would not even complain if I specify some bogus (mistyped etc) option. I do not see any way (tried different --help's etc) to figure out what options any given special remote actually supports without going online to e.g. https://git-annex.branchable.com/special_remotes/rsync/ for `rsync` special remote. It makes configuration of special remotes not a trivial matter for new users. IMHO there should be some way (mentioned in man of initremote and enableremote) to get information about supported by the remote options, e.g. `git annex getremoteopts TYPE` or alike. I am not sure how it should/would work for custom special remotes since I think protocol ATM doesn't support querying for a list of configuration options. May be `GETCONFIG` interface command could be extended to return a list with of options with description? or some new command be added? [[!meta author=yoh]] -[[!tag projects/repronim]] +[[!tag projects/datalad]] + +> [[done]] as eg `git annex initremote type=rsync --whatelse`. +> +> External special remotes that implement LISTCONFIGS can also be queried: +> +> git annex inittype type=external externaltype=foo --whatelse diff --git a/doc/todo/some_way_to_get_a_list_of_options_for_a_special_remote_of_a_given_type/comment_2_96fae68472f7f9ef720c27149556f7a0._comment b/doc/todo/some_way_to_get_a_list_of_options_for_a_special_remote_of_a_given_type/comment_2_96fae68472f7f9ef720c27149556f7a0._comment new file mode 100644 index 0000000000..47fd2773dc --- /dev/null +++ b/doc/todo/some_way_to_get_a_list_of_options_for_a_special_remote_of_a_given_type/comment_2_96fae68472f7f9ef720c27149556f7a0._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="yarikoptic" + avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4" + subject="comment 2" + date="2020-02-14T17:20:27Z" + content=""" +Sweet, thanks for the feature! + +I wondered, why `--whatelse` which has no semantic relation to \"parameter\" or \"config\", and not something like `--help-params`, `--help-configs`, or `--listconfigs` to relate it to either PARAMS (as --help reports configs) or LISTCONFIGS protocol command? +"""]] diff --git a/doc/todo/some_way_to_get_a_list_of_options_for_a_special_remote_of_a_given_type/comment_3_b75ffc63f0e9a85c3d1f01bdce21c97c._comment b/doc/todo/some_way_to_get_a_list_of_options_for_a_special_remote_of_a_given_type/comment_3_b75ffc63f0e9a85c3d1f01bdce21c97c._comment new file mode 100644 index 0000000000..34ccd48f6d --- /dev/null +++ b/doc/todo/some_way_to_get_a_list_of_options_for_a_special_remote_of_a_given_type/comment_3_b75ffc63f0e9a85c3d1f01bdce21c97c._comment @@ -0,0 +1,18 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-02-17T15:57:28Z" + content=""" +I stuggled with naming this option. I did consider something +with "config" (redundant with other configs), or "parameter" +or "option" (redundant with other options). It was +--describe-other-params for a while but that was too long to type. + +--help would be the best name, but tying it into the main option parser is +impractical; amoung other things it would make tab completion need to +somehow run external special remotes to get a list of their parameters! + +The mnemonic for --whatelse, such as it is: You've gotten at least as +far as type= (without which it won't work and will prompt for that), +and want to know what else you can configure. +"""]] diff --git a/doc/todo/speculate-can-get___58___extension_of_speculate-present.mdwn b/doc/todo/speculate-can-get___58___extension_of_speculate-present.mdwn index c6d28f2755..4d920d2123 100644 --- a/doc/todo/speculate-can-get___58___extension_of_speculate-present.mdwn +++ b/doc/todo/speculate-can-get___58___extension_of_speculate-present.mdwn @@ -2,3 +2,5 @@ Add `remote..annex-speculate-can-get` config setting for non-special remot Then one can make a quick clone of the current repo, and instead of re-configuring all its remotes in the new clone, just configure the origin to be a `speculate-can-get` remote. This would also be useful when you have unconnected but related repos, and want to occasionally share files between them without merging their histories. + +[[!tag needsthought]] diff --git a/doc/todo/sqlite_database_improvements.mdwn b/doc/todo/sqlite_database_improvements.mdwn index b21e007563..1de200a2f1 100644 --- a/doc/todo/sqlite_database_improvements.mdwn +++ b/doc/todo/sqlite_database_improvements.mdwn @@ -145,6 +145,7 @@ remaining todo: > Use scanUnlockedFiles to repopulate the Associated table. > > But that does not repopulate the Content table. Doing so needs +<<<<<<< HEAD > to iterate over the unlocked files, filter out any that are modified, > and record the InodeCaches of the unmodified ones. Seems that it would > have to use git's index to know which files are modified. diff --git a/doc/todo/stop_using_createDirectoryIfMissing_True.mdwn b/doc/todo/stop_using_createDirectoryIfMissing_True.mdwn index 70b1b2ec90..3faca053f3 100644 --- a/doc/todo/stop_using_createDirectoryIfMissing_True.mdwn +++ b/doc/todo/stop_using_createDirectoryIfMissing_True.mdwn @@ -26,3 +26,5 @@ a new empty directory in its place and start putting files in there. What's needed is an action that creates directories only up to a given point, which can be either .git/annex or the top of the worktree depending on what's being done. --[[Joey]] + +[[!tag confirmed]] diff --git a/doc/todo/support_for_storing_xattrs.mdwn b/doc/todo/support_for_storing_xattrs.mdwn index 6edf0a9d26..4bdcfefe6c 100644 --- a/doc/todo/support_for_storing_xattrs.mdwn +++ b/doc/todo/support_for_storing_xattrs.mdwn @@ -13,3 +13,5 @@ At least `curl --xattr` saves `xdg.origin.url`. Perhaps `git-annex-metadata` could be leveraged to automatically store and restore xattrs? Might even work that addition of xattrs would always have to be done through a git-annex command, but restoration would be done automatically if git-annex noticed there are xattrs stored in metadata, and the file system is mounted with `user_xattr`. The `user` namespace is used for user xattrs and thus for "proposed metadata attributes" above. These attributes are valid git-annex metadata fields as-is. + +[[!tag unlikely]] diff --git a/doc/todo/support_ssh__58____47____47___or_sftp__58____47____47___urls_via___34__built-in__34___ssh_support.mdwn b/doc/todo/support_ssh__58____47____47___or_sftp__58____47____47___urls_via___34__built-in__34___ssh_support.mdwn index 683744d847..9523279557 100644 --- a/doc/todo/support_ssh__58____47____47___or_sftp__58____47____47___urls_via___34__built-in__34___ssh_support.mdwn +++ b/doc/todo/support_ssh__58____47____47___or_sftp__58____47____47___urls_via___34__built-in__34___ssh_support.mdwn @@ -37,3 +37,4 @@ I wondered if it would be feasible for git annex natively support ssh (scp) and/ [[!meta author=yoh]] [[!tag projects/datalad]] +[[!tag unlikely moreinfo]] diff --git a/doc/todo/symlinks_for_not-present_unlocked_files.mdwn b/doc/todo/symlinks_for_not-present_unlocked_files.mdwn index 44657f3824..15e6d38d86 100644 --- a/doc/todo/symlinks_for_not-present_unlocked_files.mdwn +++ b/doc/todo/symlinks_for_not-present_unlocked_files.mdwn @@ -38,3 +38,5 @@ turn out to have missing content. So for this to really be useful, the branch needs to automatically get updated. --[[Joey]] + +[[!tag needsthought]] diff --git a/doc/todo/symlinks_to_symlinks_to_the_annex.mdwn b/doc/todo/symlinks_to_symlinks_to_the_annex.mdwn index 06f3b3da2e..d0d60f0286 100644 --- a/doc/todo/symlinks_to_symlinks_to_the_annex.mdwn +++ b/doc/todo/symlinks_to_symlinks_to_the_annex.mdwn @@ -1,3 +1,5 @@ If file A is annexed and dropped, and B is a relative symlink to A, then git annex get B should result in A being fetched, but currently doesn't. This would especially help if B is deep within some dir 'mydir', and you do git annex get mydir: annexed files under mydir get fetched, but not annexed files elsewhere in the repository to which symlinks under mydir point. So such symlinks under mydir will continue to remain broken. + +[[!tag unlikely]] diff --git a/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__.mdwn b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__.mdwn index 531b81848a..0e0dc0c5aa 100644 --- a/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__.mdwn +++ b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__.mdwn @@ -2,3 +2,5 @@ As we briefly discussed via email, it would be nice if sync could sync only some [[!meta author=yoh]] [[!tag projects/datalad]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_10_06fd59c8bdd451b5608e1e0191f4d5cb._comment b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_10_06fd59c8bdd451b5608e1e0191f4d5cb._comment new file mode 100644 index 0000000000..d40c0915d2 --- /dev/null +++ b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_10_06fd59c8bdd451b5608e1e0191f4d5cb._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="Ilya_Shlyakhter" + avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0" + subject="thanks" + date="2020-02-18T20:00:11Z" + content=""" +\"--no-content combined with --only-annex. Now implemented\" -- thanks a lot, I was also looking for that. +"""]] diff --git a/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_5_8f19d38c815dd3051004301a15657cf6._comment b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_5_8f19d38c815dd3051004301a15657cf6._comment new file mode 100644 index 0000000000..47e2061fd4 --- /dev/null +++ b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_5_8f19d38c815dd3051004301a15657cf6._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="Dan" + avatar="http://cdn.libravatar.org/avatar/986de9e060699ae70ff7c31342393adc" + subject="Still wanted (update with example)" + date="2020-02-13T20:08:45Z" + content=""" +I see this page recently was edited (when todo's were tagged) and so I wanted to chime in that this is still a feature I'm looking for, and I have a much less hypothetical use case for it. + +I'm a PhD student working on a research project where I supervise several undergraduates. We have a git repository that manages all of our code, and I let git-annex manage the large datafiles (also in the same repository) on which we run our code. The main repository is hosted on GitHub, and my students have read-only access to it. They've each made forks to which they have write access. We use a special remote that we all have write-access to, with wanted set to standard and group set to archive, so that it gets all of the content and distributes it as needed (the data is massive so git-annex is vital here since the student laptops can't realistically download it all at any one time). + +They use pull requests to the main GitHub repository to integrate their code changes, but we need a way to get the content of the git-annex branches in their forks (which are pushed to from their local repos) into the git-annex branch in the main GitHub repository. The natural solution seems for me (who has read/write access to the main repo and the fork) to do this, essentially pulling in git-annex branches from their forks to my local repo, and then pushing it to the main repo on GitHub. It'd also be nice if I can then push this back to all of their forks, too. I can do this manually, but I think I'd need to actually check out the git-annex branch (or stuff it in another worktree) and then do lots of work manually (or automate it in a script). + +First I tried `git annex sync --no-commit --no-push --no-pull` which (somewhat to my surprise) *did* pull the git-annex branches from their forks into my local repo, but didn't push `git-annex` back anywhere, and it neither pushed nor pulled `master`. So this was a good start, but I wanted to also push *only* the git-annex branch to the main repo (and ideally to their forks, too). So then I (foolishly) started dropping flags, and ended up in inadvertently pulling their work-in-progress `master` branches into the mainline and pushing this super-merged thing back to all of them. I was able to do some reseting and quick force-pushes before anyone noticed, but I should've known better :) + +Throughout this process I'm trying to teach them how to use git-annex (it's pretty clearly the right tool for the job :) but need to be really careful with what `git annex sync` commands I encourage them to run since I don't want the, + +I'd love it if there was like a `--git-annex-branch-only` option that I could pass that would then do all the pushing/pulling goodness of `git annex sync` but *without* touching `master` (or whatever branch happens to be checked out). I could then teach the students to always use this flag to avoid actually introducing changes to their master branch (they're still learning git, too, so they'd have a hard time recovering from this). Even better if this was configurable, and something I could stick in the `git-annex-config` options so that when they clone the repo this setting would propagate to them along with the git-annex branch. + +Is something like this in the pipeline? Also, is there a simpler workaround I can do for now that doesn't involve tons of (manual) merges and pushes? + +Thanks so much for such an excellent tool; if we didn't have this, we'd essentially just give up on version control for our scientific data, which would be a real bummer. +"""]] diff --git a/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_6_15e336ea35f0233e22f7c5b656b616d2._comment b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_6_15e336ea35f0233e22f7c5b656b616d2._comment new file mode 100644 index 0000000000..bb5af0160b --- /dev/null +++ b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_6_15e336ea35f0233e22f7c5b656b616d2._comment @@ -0,0 +1,56 @@ +[[!comment format=mdwn + username="joey" + subject="""Re: Still wanted (update with example)""" + date="2020-02-17T17:15:10Z" + content=""" +@Dan, thanks for explaining your use case. + +In particular, I see why you don't want to pull their master +branches with the unfinished whatever, but do want to pull +their git-annex branch, and probably fetch their feature branches +too. + +I'm still unclear on why, after merging someone's feature +branch into your branch (master I suppose), you would not +want sync to push that updated branch back to origin? Is the issue not +about pushing master to origin, but that you don't want it to push +master to their forks? But if their forks contain other changes in +their master branches, it would not overwrite the changes. + +It does seem like setting remote.name.fetch would work in your use case, +but I also understand why you might not want to use it -- refspecs are +hard! -- and when you're dealing with feature branches that might be named +anything, it's hard to write a refspec that does what you want, other than +one that fetches everything and merges nothing. + +So I do see the appeal of a git-annex sync --only-annex that separates +concerns, letting you use whatever git commands you normally would to +commit and pull and push everything, except for the git-annex branch. + +And, that name implies it also syncs the annexed content, so no need to +remember to use --content with it. (I want --content to be sync default, +but there are backcompat issues with that so annex.synccontent is only an +option.) + +Soo, I'm leaning toward adding that option and not some other --branches +option that lists branches to sync or whatever. + +---- + +And, since `git-annex config` can set repo-wide annex.synccontent and +annex.autocommit that change the behavior of `git-annex sync`, +it could make sense to also have a setting that enables --only-annex +by default. I don't know if I'd encourage setting that in your repo though;, +it might teach the students a non-standard git-annex behavior. +Re that, it would be helpful if you could finish this interrupted +thought of yours: + +> Throughout this process I'm trying to teach them how to use git-annex (it's +> pretty clearly the right tool for the job :) but need to be really careful +> with what `git annex sync` commands I encourage them to run since I don't +> want the, + +Because I'm not yet seeing how any use of git-annex sync by the students +could be problimatic; it won't be able to push their master branch +to your repo or anything. +"""]] diff --git a/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_7_757f97ac10035ec6b28720c0570b19f0._comment b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_7_757f97ac10035ec6b28720c0570b19f0._comment new file mode 100644 index 0000000000..b38f8bab06 --- /dev/null +++ b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_7_757f97ac10035ec6b28720c0570b19f0._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 7""" + date="2020-02-17T19:07:47Z" + content=""" +Implemented --only-annex. + +I'm going to close this todo, but do follow up if that does not adequately +cover your use case. +"""]] diff --git a/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_8_6c683fcba5d69a4c0051eb42b8c558a0._comment b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_8_6c683fcba5d69a4c0051eb42b8c558a0._comment new file mode 100644 index 0000000000..cd07856915 --- /dev/null +++ b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_8_6c683fcba5d69a4c0051eb42b8c558a0._comment @@ -0,0 +1,48 @@ +[[!comment format=mdwn + username="Dan" + avatar="http://cdn.libravatar.org/avatar/986de9e060699ae70ff7c31342393adc" + subject="An overdue and overlong reply" + date="2020-02-17T22:59:19Z" + content=""" +It looks like this functionality was implemented before I could get my comment writen, but I thought it might be useful to post it anyway. It seems like the implementing changes are now in master, so if I build from source I'll get these new features, right? I assume they'll also make it into the next release of git-annex (at which point I'll version bump at homebrew, which is what I'm having my students use to install git-annex). + +Thanks for your thoughtful response. I also agree that having an --only-annex option is perfectly satisfactory and more nuanced --branch-to-sync options are probably overkill. As to whether --only-annex should imply --content, I'm more agnostic and defer to your wisdom. However, if I call git-annex with --only-annex --no-content, will it push/pull the git-annex branches and leave the content alone? From looking at your commit message, it sounds like there is now a --not-only-annex option which can override a configured only-annex property, but it's not clear how --no-content might enter the picture. + +Let me try to finish my dangling thought from the last comment thread. For clarity, I'll introduce some labels for repositories and assume the only people working on this project are me (Dan) and two students, Alice and Bob. Let Dan-local refer to the repository on my laptop (and similar for {Alice,Bob}-local), let Dan-github refer to my repo on GitHub, and {Alice,Bob}-github refer to my student's forks. Dan has push access to {Dan,Alice,Bob}-github. Alice and Bob can fetch from {Dan,Alice,Bob}-github, but can *only* push to their own github repositories (Alice can push to Alice-github, Bob to Bob-github). + +Without an --only-annex option, I have two primary concerns. The first is the thought I left dangling, which I'll now complete: +Throughout this process I'm trying to teach them how to use git-annex (it's pretty clearly the right tool for the job :) ) but need to be really careful with what git annex sync commands I encourage them to run since I don't want them to inadvertantly pull changes into their local branches (especially integrating changes from one another) and then wind up being confused as to how things got there. Like many newcomers to git, they're still at the rote learning stage where they are memorizing commands to type and are still developing a mental model of what's happening when they fetch/pull/push. For this reason, I think that avoiding their local branches changing as a side-effect of a git-annex command (i.e., by specifying this option in the config that travels in git-annex branch) will make it easier for them to understand base git. There's some risk that they'll learn bad git-annex habits from this and be surprised at all the things git annex sync does when they use it elsewhere, but for now it seems easier to help them understand git but use git-annex mechanically, and once they're comfortable with that I can help them to understand what git-annex is actually doing and the nuances of git annex sync. + +The second problem is that because I'm the only one with push access to Dan-GitHub, everything has to get there either via a pull request (which I can accept after review) or I need to push it there myself via Dan-local. In particular, to keep the git-annex branch in Dan-GitHub up to date, I need to be integrating {Alice,Bob}-github/git-annex (or perhaps synced/git-annex?) into Dan-local/git-annex and then push it to Dan-GitHub/git-annex. I can do this manually, but it's a lot of typing (especially if there are many more students than just Alice and Bob), so git annex sync seems like a nice way to accomplish this. However, it has the side effect of *also* pulling in {Alice,Bob}-GitHub/{synced/master,master} and then pushing that up to Dan-GitHub/synced/master, and if Alice and Bob are also running git annex sync, changes from Alice will show up in Bob-local/master and vice versa. Moreover, if they're also pushing e.g. Alice-local/master -> Alice-GitHub/master, their pull requests will suddenly get very noisy as they'll incorporate more than just their own changes, and for them to remedy this it will require careful use of git reset which is a dangerous command for them to run at this stage of their learning. + +Git that I am, after running git annex sync I saw that my Dan-local/master was now ahead of Dan-GitHub/master, and I foolishly pushed, which now plopped half-baked code from Alice and Bob into the primary branch of our primary repository on github. +It also had the unfortunate side-effect of closing out open pull requests from Alice and Bob (since github saw that their changes were now reachable from Dan-Github/master). I did some reset-ing, git annex sync --cleanup, and some force pushes to clean everything up before Alice or Bob could fetch, so other than having to re-open their pull requests, this didn't screw them up too much. + + +Finally, I want to clarify my understanding of the synced/branch workflow, which seems clever but I never fully understood it. From some simple experimenting (I have not waded very far into the source code), it seems that if I just run git annex sync (with no flags and assuming I haven't configured anything to do otherwise), and assuming that BRANCH is checked out locally, it will do the following, *I think*: + +1. Stage and commit any changes in tracked files +1. Merge synced/BRANCH into BRANCH +2. Loop over remotes, for each + 2. Pull from the remote (seems like it just fetches all branches) + 2. Merge REMOTE/BRANCH into BRANCH + 3. If REMOTE/synced/BRANCH exists, merge it into BRANCH +7. Do octopus merge of all REMOTE/git-annex and REMOTE/synced/git-annex branches into local git-annex branch +4. Loop over remotes again, for each + 5. Push git-annex -> REMOTE/synced/git-annex + 6. Push git-annex -> REMOTE/git-annex + 6. Push BRANCH -> REMOTE/synced/BRANCH + + + + +I'm a little confused by what the synced/git-annex branches are for, but I suppose they're even less likely to ever be checkoued out that git-annex and provide a safeguard. I *think* they will be included in the octopus merge described above. + +Step 3.2 (merge REMOTE/BRANCH into BRANCH) was a surprise to me based on my reading of the git annex sync documentation since I only expected it to only integrate changes from REMOTE/synced/BRANCH. + + +It seems like neither the sync documentation on [branchable](https://git-annex.branchable.com/sync/) nor what is obtained with `man git-annex-sync` enumerate all of these steps, although reading them together gives an almost complete picture of what is going on. Since the documentation suggests the end user can just run these steps manually as an alternative to using `git annex sync`, it seems like it'd be helpful to very concretely document what those steps are. +I'd be happy to take a crack at updating the documentation to be more thorough, but wanted to make sure I actually understand what is going on before doing so. + +Again, I want to re-articulate how much I enjoy git annex and how difficult it would be to do any sort of version control for our data without it. I deeply appreciate the time and energy that you put into this very valuable and useful tool. +"""]] diff --git a/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_9_5bef9faa4df40b84c991b9ea5e1ab7a3._comment b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_9_5bef9faa4df40b84c991b9ea5e1ab7a3._comment new file mode 100644 index 0000000000..17f14eddd8 --- /dev/null +++ b/doc/todo/sync_--branches__to_sync_only_specified_branches___40__e.g._git-annex__41__/comment_9_5bef9faa4df40b84c991b9ea5e1ab7a3._comment @@ -0,0 +1,36 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 9""" + date="2020-02-18T16:23:28Z" + content=""" +Indeed, I had missed the case of --no-content combined with --only-annex. +Now implemented. + +It will be in the next release, which has slipped one day due to the above. +;-) + +-- + +I've improved the documentation of synced/ branches on the git-annex-sync +man page, although users normally should not need to concern themselves +with them. + +I see where the man page confused you about REMOTE/synced/BRANCH, +that was some particularly poor wording and is fixed. + +The difficulty with documenting what git-annex sync does in extreme detail +is that there are quite a lot of little hacks like synced branches that +most users just don't need to know about, but help users in particular +situations (who also generally don't know about or even notice it either). + +Just for example, sync sometimes pulls from the same remote twice. Why +a second pull? Well, it knows it has spent a long time at the --content +step, and so pulling again before it pushes makes it much less likely that +the push will fail due to some other change having been made on the remote +in the meantime. If a user were manually pulling and pushing, they would +most likely pull again if their push failed due to such a situation, so +there's not much point documenting what sync does (which could also change +if I find a better approach). + +So I prefer to keep the description of sync as high level as possible. +"""]] diff --git a/doc/todo/to_and_from_multiple_remotes.mdwn b/doc/todo/to_and_from_multiple_remotes.mdwn index 1c8d1370fe..d8fc28acb2 100644 --- a/doc/todo/to_and_from_multiple_remotes.mdwn +++ b/doc/todo/to_and_from_multiple_remotes.mdwn @@ -65,3 +65,5 @@ action over each remote. Seems two runners are needed with different concurrency behavior, one that balances the load amoung remotes, and one that runs the same action against multiple remotes concurrently. + +[[!tag needsthought]] diff --git a/doc/todo/tracking_changes_to_metadata.mdwn b/doc/todo/tracking_changes_to_metadata.mdwn index ed961c0d1a..81cfb13a8a 100644 --- a/doc/todo/tracking_changes_to_metadata.mdwn +++ b/doc/todo/tracking_changes_to_metadata.mdwn @@ -10,3 +10,5 @@ This has made me think that it would be nice to track changes to metadata, the s From reading the documentation about [[tips/metadata_driven_views]], it seems almost like #1 would be possible using views and doing a `git commit` myself, but that still just generates an "update" message on the `git-annex` branch. Even if it did work, using `git annex view 'NowPlaying=*'` excludes all files that aren't tagged with any machine, which makes adding new files harder. What do you think? Is this abuse of the `git-annex` branch? Would this interfere with [[design/caching_database]]? + +[[!tag needsthought moreinfo]] diff --git a/doc/todo/transitive_transfers/comment_6_ddef2f127a49c2eb8a15b5848756d888._comment b/doc/todo/transitive_transfers/comment_6_ddef2f127a49c2eb8a15b5848756d888._comment new file mode 100644 index 0000000000..323dcbae9c --- /dev/null +++ b/doc/todo/transitive_transfers/comment_6_ddef2f127a49c2eb8a15b5848756d888._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="annex2384@290036d126d86bcec28ee2f2ead549de1f59e90e" + nickname="annex2384" + avatar="http://cdn.libravatar.org/avatar/ad36fdc55abd8b9913b774fcd0177709" + subject="Similar use case" + date="2020-02-04T03:48:39Z" + content=""" +I have a similar use case, wanting to sync files to my music player from a machine that doesn't store a local copy of my music collection. I'm using a directory special remote with exporttree=yes since the player uses a FAT filesystem. Spooling locally would be absolutely fine with me; I just don't want to fetch any content that's already on the player. I suppose I could so something clever with the preferred content expression to accomplish this, but it seems like it might be complicated. For example, I think I could put the laptop and player each in a \"playersync\" group, with the player additionally in a \"player\" group, set the groupwanted expression on the playersync gorup to the files I want on the player, set the player's wanted to groupwanted and the laptop's wanted to \"groupwanted and not inallgroup=player\". But that seems pretty convoluted. +"""]] diff --git a/doc/todo/unify_adjust_with_view.mdwn b/doc/todo/unify_adjust_with_view.mdwn index 6129a08b3f..8cf16161a0 100644 --- a/doc/todo/unify_adjust_with_view.mdwn +++ b/doc/todo/unify_adjust_with_view.mdwn @@ -5,3 +5,5 @@ metadata that also has all files unlocked. There's also probably a fair amount of overlap in their implementations. --[[Joey]] + +[[!tag needsthought]] diff --git a/doc/todo/union_mounting/comment_4_2f02fe00a84bf94b7c8e437d8b80293f._comment b/doc/todo/union_mounting/comment_4_2f02fe00a84bf94b7c8e437d8b80293f._comment new file mode 100644 index 0000000000..009355299d --- /dev/null +++ b/doc/todo/union_mounting/comment_4_2f02fe00a84bf94b7c8e437d8b80293f._comment @@ -0,0 +1,67 @@ +[[!comment format=mdwn + username="chkno@50332f55d5ef2f4b7c6bec5253b853a8f2dc770e" + nickname="chkno" + avatar="http://cdn.libravatar.org/avatar/8194377c81da838dda761a5d93b9c25c" + subject="Interim shell script" + date="2020-02-04T07:26:10Z" + content=""" +Until this feature is available in git annex proper, here is a small shell script that uses [lndir](https://gitlab.freedesktop.org/xorg/util/lndir) to create a merged view of the .git/annex/objects areas of multiple git-annex repositories. + +* [union-link-annexes](https://chkno.net/union-link-annexes) + +Demo: + + $ git init repo1 + $ cd repo1 + $ git annex init repo1 + $ echo from1 > from1 + $ echo both > both + $ git annex add from1 both + $ git commit -m . + + $ cd .. + $ git clone repo1 repo2 + $ cd repo2 + $ echo from2 > from2 + $ git annex add from2 + $ git annex copy --from origin both + $ git annex sync + $ git annex list + here + |origin + || + XX both + _X from1 + X_ from2 + + $ cd ../repo1 + $ git annex sync + $ cd .. + + + $ union-link-annexes merged repo1 repo2 + + + $ grep . repo1/* + repo1/both:both + repo1/from1:from1 + grep: repo1/from2: No such file or directory + + $ grep . repo2/* + repo2/both:both + grep: repo2/from1: No such file or directory + repo2/from2:from2 + + $ grep . merged/* + merged/both:both + merged/from1:from1 + merged/from2:from2 + + $ find merged -not -type d -printf '%p -> %l\n' + merged/both -> .git/annex/objects/XV/zk/SHA256E-s5--f6d... + merged/from1 -> .git/annex/objects/vf/8W/SHA256E-s6--16e... + merged/from2 -> .git/annex/objects/3M/P2/SHA256E-s6--21e... + merged/.git/annex/objects/vf/8W/SHA256E-s6--16e... -> ../../../../../../../repo1/.git/annex/objects/vf/8W/SHA256E-s6--16e... + merged/.git/annex/objects/XV/zk/SHA256E-s5--f6d... -> ../../../../../../../repo1/.git/annex/objects/XV/zk/SHA256E-s5--f6d... + merged/.git/annex/objects/3M/P2/SHA256E-s6--21e... -> ../../../../../../../repo2/.git/annex/objects/3M/P2/SHA256E-s6--21e... +"""]] diff --git a/doc/todo/union_mounting/comment_5_58c38383a7ac2df843772960fa10204f._comment b/doc/todo/union_mounting/comment_5_58c38383a7ac2df843772960fa10204f._comment new file mode 100644 index 0000000000..8f253a793c --- /dev/null +++ b/doc/todo/union_mounting/comment_5_58c38383a7ac2df843772960fa10204f._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="https://christian.amsuess.com/chrysn" + nickname="chrysn" + avatar="http://christian.amsuess.com/avatar/c6c0d57d63ac88f3541522c4b21198c3c7169a665a2f2d733b4f78670322ffdc" + subject="union mounting and hidemissing" + date="2020-02-04T09:01:52Z" + content=""" +One easy way to achieve this with git-annex as is would be actual operating-system union mounting. This'll work as long as all (but the lowest / most costly) checkouts [have their missing files hidden](https://git-annex.branchable.com/tips/hiding_missing_files/). Just be sure not to call `git-annex` on the resulting directory (and if there's any danger of it, you might want to consider whiteing out the git configuration in an additional top layer), but only inside one of the supplying repositories. +"""]] diff --git a/doc/todo/union_mounting/comment_6_65055977d7c8db3c9c29d90e033e5bb4._comment b/doc/todo/union_mounting/comment_6_65055977d7c8db3c9c29d90e033e5bb4._comment new file mode 100644 index 0000000000..650808c636 --- /dev/null +++ b/doc/todo/union_mounting/comment_6_65055977d7c8db3c9c29d90e033e5bb4._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="chkno@50332f55d5ef2f4b7c6bec5253b853a8f2dc770e" + nickname="chkno" + avatar="http://cdn.libravatar.org/avatar/8194377c81da838dda761a5d93b9c25c" + subject="Three union-mounting methods that *don't* work" + date="2020-02-08T06:21:03Z" + content=""" +Linux's in-tree union-mounting option overlayfs [does not support modifications to underlying filesystems while an overlayfs mount is active](https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt): + +> Changes to the underlying filesystems while part of a mounted overlay filesystem are not allowed. If the underlying filesystem is changed, the behavior of the overlay is undefined, though it will not result in a crash or deadlock. + +I.e., it's great for the LiveCD case of combining a read-only squashfs and a private tmpfs into something that behaves just like a normal filesystem, but it cannot be used to export a read-only view of multiple mutable resources. + +I can report that [aufs](http://aufs.sourceforge.net/) also doesn't work for this use case, at least as of 2014 when I last tried it. Writes to underlying filesystems cause kernel panics that bring the whole machine down. + +I can also report that [hanwen/go-fuse's unionfs](https://github.com/hanwen/go-fuse/blob/master/example/unionfs/main.go) doesn't work for this use case. Example problem: Growing files' sizes get stuck at the size they were the first time they were viewed through the union mount. +"""]] diff --git a/doc/todo/universal_batch_mode.mdwn b/doc/todo/universal_batch_mode.mdwn index 0e85b960a4..37ebc1e88e 100644 --- a/doc/todo/universal_batch_mode.mdwn +++ b/doc/todo/universal_batch_mode.mdwn @@ -1,3 +1,4 @@ It would help if there was a universal batch mode, where git-annex command lines are given as lines in an input file, and are executed as a batch. A batch could contain different git-annex commands (as opposed to different parameters for one command). git-annex could intelligently group, reorder and parallelize the execution, as long as the overall effect of the batch is unchanged. (I.e. commands affecting different keys/paths could be run in parallel; commands repeatedly doing the same thing could be collapsed; git command batching could span different git-annex commands; etc.) I find myself implementing something like that in python on top of git-annex, but it would be much more efficient and robust if supported natively. Maybe, the DataLad project would also find this useful? +[[!tag needsthought]] diff --git a/doc/todo/universal_batch_mode/comment_3_c8232195fa3db718dc2b246d528e6c00._comment b/doc/todo/universal_batch_mode/comment_3_c8232195fa3db718dc2b246d528e6c00._comment new file mode 100644 index 0000000000..5f694569ab --- /dev/null +++ b/doc/todo/universal_batch_mode/comment_3_c8232195fa3db718dc2b246d528e6c00._comment @@ -0,0 +1,46 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2020-01-06T18:53:02Z" + content=""" +Thanks for digging that up. + +Hmm, if the goal was to check each command for such problems when adding +--batch, it didn't stop `git-annex add --batch` from being added, despite +indeed having such a buffering behavior. You can currently shoot your foot +combining that with `git annex readpresentkey --batch`, the same way as you +could with a hypothetical universal batch mode that let you run add +followed by readpresentkey. + +I don't see a universal batch mode being really able to detect +and avoid such problems either. How is it supposed to know that an add of +"dir/" will amoung other things add the content of key FOO, which was not +present before, and so a readpresentkey FOO should be delayed until after +the add, and the add's buffer flushed. It would have to model the +behavior of commands and insert barriers/flush points, and the modeling +could necessarily not be that fine-grained, so it would need to flush +the add buffer every time before readpresentkey. But there are surely +ways to combine batch use of add and readpresentkey that you know won't be +affected by the add buffering, and that would make those unncessarily slow. + +Anyway, looking at the implementation of --batch for different commands, +sometimes it's trivial enough to wish it were generalized, but other times +there is batch-specific behavior. `add --batch` errors out if --update +is also used. `checkpresentkey --batch` outputs status codes rather than +the command's normal behavior of exiting 1/0. So we need batch-specific +implementations. + +--- + +idea: What might be good is a mode that lets any batch-capable commands +be combined together, not trying to support every possible command, and +perhaps with some added commands that the user can use to flush buffers +etc between operations as desired. Eg: + + git annex batch < This todo item was prompted by the `git add` behavior +> change. But that was caused by an automatic upgrade to repo version 7, +> not by any particular recent version of git-annex. It was actually +> implemented in 2016 or something like that and repo versions v6 (and v7) +> always had that behavior. Warning about it back then would not have helped +> with people who encountered the change in behavior in 2019. +> And, before a user upgraded to v6/v7, `git add` did not run git-annex, so +> there is no way that git-annex could have made `git add` warn about a +> upcoming behavior change. +> +> The major version number of git-annex indicates what repository version +> number it supports. 8.20190912 would not support v8 repositories, so +> that's not the right version number for it. +> +> Intentional changes to git-annex behavior have been very scant if you +> look back over the changelog. If I were making one today, I'd maybe think +> about warning first for a while. But that also slows down the development +> velocity if something is dependent on that behavior change, since you +> then have to wait some period of time for users to see the warning. +> +> Anyway, I don't see any point leaving a todo item open about a +> hypothetical future change, and I'm not willing to commit to always doing +> it, so [[done]] I guess? --[[Joey]] diff --git a/doc/todo/way_to_untrust_without_unncessary_branch_update.mdwn b/doc/todo/way_to_untrust_without_unncessary_branch_update.mdwn index 2f54ce6f3f..04d9e28af4 100644 --- a/doc/todo/way_to_untrust_without_unncessary_branch_update.mdwn +++ b/doc/todo/way_to_untrust_without_unncessary_branch_update.mdwn @@ -11,3 +11,5 @@ runs it will pile up a change in the git-annex branch. So, suggest a flag to avoid generally unnecessary branch update. It would also make sense to add it to the other trust level commands, and possibly other config setting commands. --[[Joey]] + +[[!tag needsthought]] diff --git a/doc/todo/webapp_export_remote_configuration_interface.mdwn b/doc/todo/webapp_export_remote_configuration_interface.mdwn index d6870a68e2..1ab60d875d 100644 --- a/doc/todo/webapp_export_remote_configuration_interface.mdwn +++ b/doc/todo/webapp_export_remote_configuration_interface.mdwn @@ -9,3 +9,4 @@ The UI for S3, WebDAV, directory special remote setup could also have a way to make it an export, and configure the directory to export. This would complicate the UI, so needs thought. --[[Joey]] +[[!tag needsthought]] diff --git a/doc/todo/windows_support.mdwn b/doc/todo/windows_support.mdwn index f636da9f59..da049b2f10 100644 --- a/doc/todo/windows_support.mdwn +++ b/doc/todo/windows_support.mdwn @@ -29,19 +29,8 @@ seems unreliable/broken on Windows. to avoid this, but only ended up with encoding crashes, or worse mojibake than this. -* `md5FilePath` still uses the filesystem encoding, and so may produce the - wrong value on Windows. This would impact keys that contain problem characters - (probably coming from the filename extension), and might cause - interoperability problems when git-annex generates the hash directories of a - remote, for example a rsync remote. - -* `encodeW8` is used in Git.UnionMerge, and while I fixed the other calls to - encodeW8, which all involved ByteStrings reading from git and so can just - treat it as utf-8 on Windows (via `decodeBS`), in the union merge case, - the ByteString has no defined encoding. It may have been written on Unix - and contain keys with invalid unicode in them. On windows, the union - merge code should probably check if it's valid utf-8, and if not, - abort the merge. + > This may be use to windows actually using utf-16, but git-annex uses + > utf-8 for filename encoding when on windows. * If interoperating with a git-annex repository from a unix system, it's possible for a key to contain some invalid utf-8, which means its filename diff --git a/doc/todo/wishlist__58___disable_automatic_commits/comment_4_e021de3647a00aa0259a247441de50eb._comment b/doc/todo/wishlist__58___disable_automatic_commits/comment_4_e021de3647a00aa0259a247441de50eb._comment new file mode 100644 index 0000000000..56be81e356 --- /dev/null +++ b/doc/todo/wishlist__58___disable_automatic_commits/comment_4_e021de3647a00aa0259a247441de50eb._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="AdamSpiers" + avatar="http://cdn.libravatar.org/avatar/ae41dba34ee6000056f00793c695be75" + subject="Would you accept a patch implementing an annex.autocommit gitattribute?" + date="2020-01-06T16:12:54Z" + content=""" +In a semi-mythical future where I find the time to learn Haskell, in principle would you consider accepting a patch implementing my above suggestion of a new `annex.autocommit` gitattribute which offers finer control of autocommit than the current binary toggle? It would be very useful to me, and maybe I'm not the only one. +"""]] diff --git a/doc/users/anarcat.mdwn b/doc/users/anarcat.mdwn index 7a911e78fe..7611e4b0b5 100644 --- a/doc/users/anarcat.mdwn +++ b/doc/users/anarcat.mdwn @@ -18,13 +18,13 @@ My todos ... same. [[!inline pages="todo/* and !todo/done and !link(todo/done) and -link(users/anarcat)" sort=mtime feeds=no actions=yes archive=yes show=0]] +link(users/anarcat)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] Done ---- [[!inline pages="todo/* and !todo/done and link(todo/done) and -link(users/anarcat)" feeds=no actions=yes archive=yes show=0]] +link(users/anarcat)" feeds=no actions=yes archive=yes show=0 template=buglist]] My bugs ======= diff --git a/doc/users/dandi.mdwn b/doc/users/dandi.mdwn index 1dbb61c97a..b1a1773bf9 100644 --- a/doc/users/dandi.mdwn +++ b/doc/users/dandi.mdwn @@ -1,19 +1 @@ -TODOs for DANDI -=============== - -[[!inline pages="todo/* and !todo/done and !link(todo/done) and project(dandi)" sort=mtime feeds=no actions=yes archive=yes show=0]] - -Done ----- - -[[!inline pages="todo/* and !todo/done and link(todo/done) and project(dandi)" feeds=no actions=yes archive=yes show=0]] - -My bugs -======= - -[[!inline pages="bugs/* and !bugs/done and !link(bugs/done) and project(dandi)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] - -Fixed ------ - -[[!inline pages="bugs/* and !bugs/done and link(bugs/done) and project(dandi)" feeds=no actions=yes archive=yes show=0 template=buglist]] +Moved to [[projects/dandi]] diff --git a/doc/users/mih.mdwn b/doc/users/mih.mdwn index 1a0327a400..8b3670bf12 100644 --- a/doc/users/mih.mdwn +++ b/doc/users/mih.mdwn @@ -2,13 +2,13 @@ My todos ======== [[!inline pages="todo/* and !todo/done and !link(todo/done) and -author(mih)" sort=mtime feeds=no actions=yes archive=yes show=0]] +author(mih)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] Done ---- [[!inline pages="todo/* and !todo/done and link(todo/done) and -author(mih)" feeds=no actions=yes archive=yes show=0]] +author(mih)" feeds=no actions=yes archive=yes show=0 template=buglist]] My bugs ======= diff --git a/doc/users/parhuzamos.mdwn b/doc/users/parhuzamos.mdwn index df53229403..32bd05275a 100644 --- a/doc/users/parhuzamos.mdwn +++ b/doc/users/parhuzamos.mdwn @@ -5,4 +5,4 @@ Todos: - publish git-annex scripts [[!inline pages="todo/* and !todo/done and !link(todo/done) and -link(users/parhuzamos)" sort=mtime feeds=no actions=yes archive=yes show=0]] +link(users/parhuzamos)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] diff --git a/doc/users/timothe.mdwn b/doc/users/timothe.mdwn new file mode 100644 index 0000000000..3ddbfa54fb --- /dev/null +++ b/doc/users/timothe.mdwn @@ -0,0 +1,5 @@ +Coucou Mijhito ! + +Oui, c'est bien ici que le brainstorm se passe ! + +Tim diff --git a/doc/users/yarikoptic.mdwn b/doc/users/yarikoptic.mdwn index cb4f6ff9e4..0f93005ba9 100644 --- a/doc/users/yarikoptic.mdwn +++ b/doc/users/yarikoptic.mdwn @@ -12,4 +12,4 @@ To discover more, visit # TODO/BUGs pages which should likely be tagged with one of the projects -[[!inline pages="(todo/* or bugs/*) and (author(yoh) or author(mih) or author(ben) or author(yarikoptic) or author(kyle)) and !tagged(projects/*)" sort=mtime feeds=no actions=yes archive=yes show=0]] +[[!inline pages="(todo/* or bugs/*) and (author(yoh) or author(mih) or author(ben) or author(yarikoptic) or author(kyle)) and !tagged(projects/*)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] diff --git a/doc/users/yoh.mdwn b/doc/users/yoh.mdwn index e32e7be67f..fca9c5553b 100644 --- a/doc/users/yoh.mdwn +++ b/doc/users/yoh.mdwn @@ -2,13 +2,13 @@ My todos ======== [[!inline pages="todo/* and !todo/done and !link(todo/done) and -author(yoh)" sort=mtime feeds=no actions=yes archive=yes show=0]] +author(yoh)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]] Done ---- [[!inline pages="todo/* and !todo/done and link(todo/done) and -author(yoh)" feeds=no actions=yes archive=yes show=0]] +author(yoh)" feeds=no actions=yes archive=yes show=0 template=buglist]] My bugs ======= diff --git a/git-annex.cabal b/git-annex.cabal index 2602907595..d72ceee7b8 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -1,11 +1,11 @@ Name: git-annex -Version: 8.20200101 +Version: 8.20200221 Cabal-Version: >= 1.8 License: AGPL-3 Maintainer: Joey Hess Author: Joey Hess Stability: Stable -Copyright: 2010-2019 Joey Hess +Copyright: 2010-2020 Joey Hess License-File: COPYRIGHT Homepage: http://git-annex.branchable.com/ Build-type: Custom @@ -296,7 +296,7 @@ source-repository head custom-setup Setup-Depends: base (>= 4.11.1.0), hslogger, split, unix-compat, process, filepath, exceptions, bytestring, directory, IfElse, data-default, - filepath-bytestring (>= 1.4.2.1.0), + filepath-bytestring (>= 1.4.2.1.1), utf8-string, transformers, Cabal Executable git-annex @@ -315,13 +315,13 @@ Executable git-annex case-insensitive, random, dlist, - unix-compat, + unix-compat (>= 0.5), SafeSemaphore, async, directory (>= 1.2), disk-free-space, filepath, - filepath-bytestring (>= 1.4.2.1.0), + filepath-bytestring (>= 1.4.2.1.1), IfElse, hslogger, monad-logger, @@ -394,7 +394,6 @@ Executable git-annex if (os(windows)) Build-Depends: Win32 (>= 2.6.1.0), - unix-compat (>= 0.5), setenv, process (>= 1.6.2.0), silently (>= 1.2.5.1) @@ -996,8 +995,10 @@ Executable git-annex Types.MetaData Types.Mime Types.NumCopies + Types.ProposedAccepted Types.RefSpec Types.Remote + Types.RemoteConfig Types.RemoteState Types.RepoVersion Types.ScheduledActivity diff --git a/standalone/linux/skel/runshell b/standalone/linux/skel/runshell index 0c39199193..7ef160135c 100755 --- a/standalone/linux/skel/runshell +++ b/standalone/linux/skel/runshell @@ -212,8 +212,8 @@ case "$os" in useproot=1 # Store ssh connection caching sockets outside of sdcard. - GIT_ANNEX_TMP_DIR="$TMPDIR" - export GIT_ANNEX_TMP_DIR + GIT_ANNEX_SSH_SOCKET_DIR="$TMPDIR" + export GIT_ANNEX_SSH_SOCKET_DIR GIT_ANNEX_STANDLONE_ENV="PATH GCONV_PATH MANPATH LOCPATH" export GIT_ANNEX_STANDLONE_ENV