Merge branch 'master' into sqlite

This commit is contained in:
Joey Hess 2019-12-26 15:15:42 -04:00
commit 2b821eb225
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
43 changed files with 748 additions and 248 deletions

View file

@ -214,7 +214,7 @@ newState c r = do
new :: Git.Repo -> IO AnnexState
new r = do
r' <- Git.Config.read =<< Git.relPath r
let c = extractGitConfig r'
let c = extractGitConfig FromGitConfig r'
newState c =<< fixupRepo r' c
{- Performs an action in the Annex monad from a starting state,
@ -325,7 +325,7 @@ changeGitRepo r = do
r' <- liftIO $ adjuster r
changeState $ \s -> s
{ repo = r'
, gitconfig = extractGitConfig r'
, gitconfig = extractGitConfig FromGitConfig r'
}
{- Adds an adjustment to the Repo data. Adjustments persist across reloads

View file

@ -20,8 +20,12 @@ module Annex.FileMatcher (
preferredContentParser,
ParseToken,
parsedToMatcher,
mkLargeFilesParser,
mkMatchExpressionParser,
largeFilesMatcher,
AddUnlockedMatcher,
addUnlockedMatcher,
checkAddUnlockedMatcher,
module Types.FileMatcher
) where
import qualified Data.Map as M
@ -30,12 +34,14 @@ import Annex.Common
import Limit
import Utility.Matcher
import Types.Group
import qualified Annex
import Types.FileMatcher
import Types.GitConfig
import Config.GitConfig
import Git.FilePath
import Types.Remote (RemoteConfig)
import Annex.CheckAttr
import Git.CheckAttr (unspecifiedAttr)
import qualified Git.Config
#ifdef WITH_MAGICMIME
import Annex.Magic
#endif
@ -173,8 +179,8 @@ preferredContentTokens pcd = concat
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
mkLargeFilesParser = do
mkMatchExpressionParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
mkMatchExpressionParser = do
#ifdef WITH_MAGICMIME
magicmime <- liftIO initMagicMime
let mimer n f = ValueToken n (usev $ f magicmime)
@ -197,26 +203,57 @@ mkLargeFilesParser = do
]
#endif
return $ map parse . tokenizeMatcher
where
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -}
- to be added to the annex, rather than directly to git.
-
- annex.largefiles is configured in git config, or git attributes,
- or global git-annex config, in that order.
-}
largeFilesMatcher :: Annex GetFileMatcher
largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
largeFilesMatcher = go =<< getGitConfigVal' annexLargeFiles
where
go (Just expr) = do
matcher <- mkmatcher expr
go (HasGitConfig (Just expr)) = do
matcher <- mkmatcher expr "git config"
return $ const $ return matcher
go Nothing = return $ \file -> do
go v = return $ \file -> do
expr <- checkAttr "annex.largefiles" file
if null expr || expr == unspecifiedAttr
then return matchAll
else mkmatcher expr
then case v of
HasGlobalConfig (Just expr') ->
mkmatcher expr' "git-annex config"
_ -> return matchAll
else mkmatcher expr "gitattributes"
mkmatcher expr = do
parser <- mkLargeFilesParser
either badexpr return $ parsedToMatcher $ parser expr
badexpr e = giveup $ "bad annex.largefiles configuration: " ++ e
mkmatcher expr cfgfrom = do
parser <- mkMatchExpressionParser
either (badexpr cfgfrom) return $ parsedToMatcher $ parser expr
badexpr cfgfrom e = giveup $ "bad annex.largefiles configuration in " ++ cfgfrom ++ ": " ++ e
newtype AddUnlockedMatcher = AddUnlockedMatcher (FileMatcher Annex)
addUnlockedMatcher :: Annex AddUnlockedMatcher
addUnlockedMatcher = AddUnlockedMatcher <$>
(go =<< getGitConfigVal' annexAddUnlocked)
where
go (HasGitConfig (Just expr)) = mkmatcher expr "git config"
go (HasGlobalConfig (Just expr)) = mkmatcher expr "git annex config"
go _ = matchalways False
mkmatcher :: String -> String -> Annex (FileMatcher Annex)
mkmatcher expr cfgfrom = case Git.Config.isTrueFalse expr of
Just b -> matchalways b
Nothing -> do
parser <- mkMatchExpressionParser
either (badexpr cfgfrom) return $ parsedToMatcher $ parser expr
badexpr cfgfrom e = giveup $ "bad annex.addunlocked configuration in " ++ cfgfrom ++ ": " ++ e
matchalways True = return $ MOp limitAnything
matchalways False = return $ MOp limitNothing
checkAddUnlockedMatcher :: AddUnlockedMatcher -> MatchInfo -> Annex Bool
checkAddUnlockedMatcher (AddUnlockedMatcher matcher) mi =
checkMatcher' matcher mi S.empty
simply :: MatchFiles Annex -> ParseResult (MatchFiles Annex)
simply = Right . Operation

View file

@ -25,6 +25,7 @@ module Annex.Ingest (
import Annex.Common
import Types.KeySource
import Types.FileMatcher
import Backend
import Annex.Content
import Annex.Perms
@ -45,6 +46,7 @@ import Utility.Metered
import Git.FilePath
import Annex.InodeSentinal
import Annex.AdjustedBranch
import Annex.FileMatcher
import Control.Exception (IOException)
@ -306,10 +308,10 @@ forceParams = ifM (Annex.getState Annex.force)
- unless symlinks are not supported. annex.addunlocked can override that.
- Also, when in an adjusted unlocked branch, always add files unlocked.
-}
addUnlocked :: Annex Bool
addUnlocked =
addUnlocked :: AddUnlockedMatcher -> MatchInfo -> Annex Bool
addUnlocked matcher mi =
((not . coreSymlinks <$> Annex.getGitConfig) <||>
(annexAddUnlocked <$> Annex.getGitConfig) <||>
(checkAddUnlockedMatcher matcher mi) <||>
(maybe False isadjustedunlocked . snd <$> getCurrentBranch)
)
where
@ -319,12 +321,13 @@ addUnlocked =
{- Adds a file to the work tree for the key, and stages it in the index.
- The content of the key may be provided in a temp file, which will be
- moved into place.
- moved into place. If no content is provided, adds an annex link but does
- not ingest the content.
-
- When the content of the key is not accepted into the annex, returns False.
-}
addAnnexedFile :: FilePath -> Key -> Maybe FilePath -> Annex Bool
addAnnexedFile file key mtmp = ifM addUnlocked
addAnnexedFile :: AddUnlockedMatcher -> FilePath -> Key -> Maybe FilePath -> Annex Bool
addAnnexedFile matcher file key mtmp = ifM (addUnlocked matcher mi)
( do
mode <- maybe
(pure Nothing)
@ -348,6 +351,24 @@ addAnnexedFile file key mtmp = ifM addUnlocked
Nothing -> return True
)
where
mi = case mtmp of
Just tmp -> MatchingFile $ FileInfo
{ currFile = toRawFilePath tmp
, matchFile = toRawFilePath file
}
-- Provide as much info as we can without access to the
-- file's content. It's better to provide wrong info
-- than for an operation to fail just because it can't
-- tell if a file should be unlocked or locked.
Nothing -> MatchingInfo $ ProvidedInfo
{ providedFilePath = Right file
, providedKey = Right key
, providedFileSize = Right $ fromMaybe 0 $
keySize `fromKey` key
, providedMimeType = Right "application/octet-stream"
, providedMimeEncoding = Right "binary"
}
linkunlocked mode = linkFromAnnex key file mode >>= \case
LinkAnnexFailed -> liftIO $
writePointerFile (toRawFilePath file) key mode

View file

@ -94,7 +94,7 @@ autoEnable = do
_ -> return ()
where
configured rc = fromMaybe False $
Git.Config.isTrue =<< M.lookup autoEnableField rc
Git.Config.isTrueFalse =<< M.lookup autoEnableField rc
canenable u = (/= DeadTrusted) <$> lookupTrust u
getenabledremotes = M.fromList
. map (\r -> (getcu r, r))

View file

@ -23,6 +23,20 @@ git-annex (7.20191219) UNRELEASED; urgency=medium
like get also sped up in cases where they have to check a lot of
files but only transfer a few files. Speedups range from 30-100%.
* Added build dependency on the filepath-bytestring library.
* Fixed an oversight that had always prevented annex.resolvemerge
from being honored, when it was configured by git-annex config.
* annex.largefiles can be configured by git-annex config,
to more easily set a default that will also be used by clones,
without needing to shoehorn the expression into the gitattributes file.
The git config and gitattributes override that.
* annex.addunlocked can be set to an expression with the same format used by
annex.largefiles, when you want to default to unlocking some files but
not others.
* annex.addunlocked can be configured by git-annex config.
* git-annex-config --set/--unset: No longer change the local git config
setting, except for in the special case of annex.securehashesonly.
* Improve file ordering behavior when one parameter is "." and other
parameters are other directories.
-- Joey Hess <id@joeyh.name> Wed, 18 Dec 2019 15:12:40 -0400

View file

@ -50,9 +50,10 @@ optParser desc = AddOptions
seek :: AddOptions -> CommandSeek
seek o = startConcurrency commandStages $ do
matcher <- largeFilesMatcher
let gofile file = ifM (checkFileMatcher matcher (fromRawFilePath file) <||> Annex.getState Annex.force)
( start file
largematcher <- largeFilesMatcher
addunlockedmatcher <- addUnlockedMatcher
let gofile file = ifM (checkFileMatcher largematcher (fromRawFilePath file) <||> Annex.getState Annex.force)
( start file addunlockedmatcher
, ifM (annexAddSmallFiles <$> Annex.getGitConfig)
( startSmall file
, stop
@ -87,8 +88,8 @@ addFile file = do
Annex.Queue.addCommand "add" (ps++[Param "--"]) [fromRawFilePath file]
return True
start :: RawFilePath -> CommandStart
start file = do
start :: RawFilePath -> AddUnlockedMatcher -> CommandStart
start file addunlockedmatcher = do
mk <- liftIO $ isPointerFile file
maybe go fixuppointer mk
where
@ -101,7 +102,7 @@ start file = do
starting "add" (ActionItemWorkTreeFile file) $
if isSymbolicLink s
then next $ addFile file
else perform file
else perform file addunlockedmatcher
addpresent key =
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
Just s | isSymbolicLink s -> fixuplink key
@ -117,9 +118,10 @@ start file = do
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
next $ addFile file
perform :: RawFilePath -> CommandPerform
perform file = withOtherTmp $ \tmpdir -> do
lockingfile <- not <$> addUnlocked
perform :: RawFilePath -> AddUnlockedMatcher -> CommandPerform
perform file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
lockingfile <- not <$> addUnlocked addunlockedmatcher
(MatchingFile (FileInfo file file))
let cfg = LockDownConfig
{ lockingFile = lockingfile
, hardlinkFileTmpDir = Just tmpdir

View file

@ -94,16 +94,16 @@ parseDownloadOptions withfileoption = DownloadOptions
seek :: AddUrlOptions -> CommandSeek
seek o = startConcurrency commandStages $ do
addunlockedmatcher <- addUnlockedMatcher
let go (o', u) = do
r <- Remote.claimingUrl u
if Remote.uuid r == webUUID || rawOption (downloadOptions o')
then void $ commandAction $ startWeb addunlockedmatcher o' u
else checkUrl addunlockedmatcher r o' u
forM_ (addUrls o) (\u -> go (o, u))
case batchOption o of
Batch fmt -> batchInput fmt (parseBatchInput o) go
NoBatch -> noop
where
go (o', u) = do
r <- Remote.claimingUrl u
if Remote.uuid r == webUUID || rawOption (downloadOptions o')
then void $ commandAction $ startWeb o' u
else checkUrl r o' u
parseBatchInput :: AddUrlOptions -> String -> Either String (AddUrlOptions, URLString)
parseBatchInput o s
@ -114,8 +114,8 @@ parseBatchInput o s
else Right (o { downloadOptions = (downloadOptions o) { fileOption = Just f } }, u)
| otherwise = Right (o, s)
checkUrl :: Remote -> AddUrlOptions -> URLString -> Annex ()
checkUrl r o u = do
checkUrl :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> URLString -> Annex ()
checkUrl addunlockedmatcher r o u = do
pathmax <- liftIO $ fileNameLengthLimit "."
let deffile = fromMaybe (urlString2file u (pathdepthOption o) pathmax) (fileOption (downloadOptions o))
go deffile =<< maybe
@ -129,49 +129,49 @@ checkUrl r o u = do
next $ return False
go deffile (Right (UrlContents sz mf)) = do
let f = adjustFile o (fromMaybe (maybe deffile fromSafeFilePath mf) (fileOption (downloadOptions o)))
void $ commandAction $ startRemote r o f u sz
void $ commandAction $ startRemote addunlockedmatcher r o f u sz
go deffile (Right (UrlMulti l)) = case fileOption (downloadOptions o) of
Nothing ->
forM_ l $ \(u', sz, f) -> do
let f' = adjustFile o (deffile </> fromSafeFilePath f)
void $ commandAction $ startRemote r o f' u' sz
void $ commandAction $ startRemote addunlockedmatcher r o f' u' sz
Just f -> case l of
[] -> noop
((u',sz,_):[]) -> do
let f' = adjustFile o f
void $ commandAction $ startRemote r o f' u' sz
void $ commandAction $ startRemote addunlockedmatcher r o f' u' sz
_ -> giveup $ unwords
[ "That url contains multiple files according to the"
, Remote.name r
, " remote; cannot add it to a single file."
]
startRemote :: Remote -> AddUrlOptions -> FilePath -> URLString -> Maybe Integer -> CommandStart
startRemote r o file uri sz = do
startRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> FilePath -> URLString -> Maybe Integer -> CommandStart
startRemote addunlockedmatcher r o file uri sz = do
pathmax <- liftIO $ fileNameLengthLimit "."
let file' = joinPath $ map (truncateFilePath pathmax) $ splitDirectories file
startingAddUrl uri o $ do
showNote $ "from " ++ Remote.name r
showDestinationFile file'
performRemote r o uri file' sz
performRemote addunlockedmatcher r o uri file' sz
performRemote :: Remote -> AddUrlOptions -> URLString -> FilePath -> Maybe Integer -> CommandPerform
performRemote r o uri file sz = ifAnnexed (toRawFilePath file) adduri geturi
performRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> URLString -> FilePath -> Maybe Integer -> CommandPerform
performRemote addunlockedmatcher r o uri file sz = ifAnnexed (toRawFilePath file) adduri geturi
where
loguri = setDownloader uri OtherDownloader
adduri = addUrlChecked o loguri file (Remote.uuid r) checkexistssize
checkexistssize key = return $ case sz of
Nothing -> (True, True, loguri)
Just n -> (True, n == fromMaybe n (fromKey keySize key), loguri)
geturi = next $ isJust <$> downloadRemoteFile r (downloadOptions o) uri file sz
geturi = next $ isJust <$> downloadRemoteFile addunlockedmatcher r (downloadOptions o) uri file sz
downloadRemoteFile :: Remote -> DownloadOptions -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key)
downloadRemoteFile r o uri file sz = checkCanAdd file $ do
downloadRemoteFile :: AddUnlockedMatcher -> Remote -> DownloadOptions -> URLString -> FilePath -> Maybe Integer -> Annex (Maybe Key)
downloadRemoteFile addunlockedmatcher r o uri file sz = checkCanAdd file $ do
let urlkey = Backend.URL.fromUrl uri sz
liftIO $ createDirectoryIfMissing True (parentDir file)
ifM (Annex.getState Annex.fast <||> pure (relaxedOption o))
( do
addWorkTree (Remote.uuid r) loguri file urlkey Nothing
addWorkTree addunlockedmatcher (Remote.uuid r) loguri file urlkey Nothing
return (Just urlkey)
, do
-- Set temporary url for the urlkey
@ -181,15 +181,15 @@ downloadRemoteFile r o uri file sz = checkCanAdd file $ do
let downloader = \dest p -> fst
<$> Remote.retrieveKeyFile r urlkey
(AssociatedFile (Just (toRawFilePath file))) dest p
ret <- downloadWith downloader urlkey (Remote.uuid r) loguri file
ret <- downloadWith addunlockedmatcher downloader urlkey (Remote.uuid r) loguri file
removeTempUrl urlkey
return ret
)
where
loguri = setDownloader uri OtherDownloader
startWeb :: AddUrlOptions -> URLString -> CommandStart
startWeb o urlstring = go $ fromMaybe bad $ parseURI urlstring
startWeb :: AddUnlockedMatcher -> AddUrlOptions -> URLString -> CommandStart
startWeb addunlockedmatcher o urlstring = go $ fromMaybe bad $ parseURI urlstring
where
bad = fromMaybe (giveup $ "bad url " ++ urlstring) $
Url.parseURIRelaxed $ urlstring
@ -209,12 +209,12 @@ startWeb o urlstring = go $ fromMaybe bad $ parseURI urlstring
( pure $ url2file url (pathdepthOption o) pathmax
, pure f
)
performWeb o urlstring file urlinfo
performWeb addunlockedmatcher o urlstring file urlinfo
performWeb :: AddUrlOptions -> URLString -> FilePath -> Url.UrlInfo -> CommandPerform
performWeb o url file urlinfo = ifAnnexed (toRawFilePath file) addurl geturl
performWeb :: AddUnlockedMatcher -> AddUrlOptions -> URLString -> FilePath -> Url.UrlInfo -> CommandPerform
performWeb addunlockedmatcher o url file urlinfo = ifAnnexed (toRawFilePath file) addurl geturl
where
geturl = next $ isJust <$> addUrlFile (downloadOptions o) url urlinfo file
geturl = next $ isJust <$> addUrlFile addunlockedmatcher (downloadOptions o) url urlinfo file
addurl = addUrlChecked o url file webUUID $ \k ->
ifM (pure (not (rawOption (downloadOptions o))) <&&> youtubeDlSupported url)
( return (True, True, setDownloader url YoutubeDownloader)
@ -249,15 +249,15 @@ addUrlChecked o url file u checkexistssize key =
- different file, based on the title of the media. Unless the user
- specified fileOption, which then forces using the FilePath.
-}
addUrlFile :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
addUrlFile o url urlinfo file =
addUrlFile :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
addUrlFile addunlockedmatcher o url urlinfo file =
ifM (Annex.getState Annex.fast <||> pure (relaxedOption o))
( nodownloadWeb o url urlinfo file
, downloadWeb o url urlinfo file
( nodownloadWeb addunlockedmatcher o url urlinfo file
, downloadWeb addunlockedmatcher o url urlinfo file
)
downloadWeb :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
downloadWeb o url urlinfo file =
downloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
downloadWeb addunlockedmatcher o url urlinfo file =
go =<< downloadWith' downloader urlkey webUUID url (AssociatedFile (Just (toRawFilePath file)))
where
urlkey = addSizeUrlKey urlinfo $ Backend.URL.fromUrl url Nothing
@ -272,7 +272,7 @@ downloadWeb o url urlinfo file =
normalfinish tmp = checkCanAdd file $ do
showDestinationFile file
liftIO $ createDirectoryIfMissing True (parentDir file)
finishDownloadWith tmp webUUID url file
finishDownloadWith addunlockedmatcher tmp webUUID url file
tryyoutubedl tmp
-- Ask youtube-dl what filename it will download
-- first, and check if that is already an annexed file,
@ -298,7 +298,7 @@ downloadWeb o url urlinfo file =
cleanuptmp
checkCanAdd dest $ do
showDestinationFile dest
addWorkTree webUUID mediaurl dest mediakey (Just mediafile)
addWorkTree addunlockedmatcher webUUID mediaurl dest mediakey (Just mediafile)
return $ Just mediakey
Right Nothing -> normalfinish tmp
Left msg -> do
@ -341,13 +341,13 @@ showDestinationFile file = do
- Downloads the url, sets up the worktree file, and returns the
- real key.
-}
downloadWith :: (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> FilePath -> Annex (Maybe Key)
downloadWith downloader dummykey u url file =
downloadWith :: AddUnlockedMatcher -> (FilePath -> MeterUpdate -> Annex Bool) -> Key -> UUID -> URLString -> FilePath -> Annex (Maybe Key)
downloadWith addunlockedmatcher downloader dummykey u url file =
go =<< downloadWith' downloader dummykey u url afile
where
afile = AssociatedFile (Just (toRawFilePath file))
go Nothing = return Nothing
go (Just tmp) = finishDownloadWith tmp u url file
go (Just tmp) = finishDownloadWith addunlockedmatcher tmp u url file
{- Like downloadWith, but leaves the dummy key content in
- the returned location. -}
@ -363,8 +363,8 @@ downloadWith' downloader dummykey u url afile =
then return (Just tmp)
else return Nothing
finishDownloadWith :: FilePath -> UUID -> URLString -> FilePath -> Annex (Maybe Key)
finishDownloadWith tmp u url file = do
finishDownloadWith :: AddUnlockedMatcher -> FilePath -> UUID -> URLString -> FilePath -> Annex (Maybe Key)
finishDownloadWith addunlockedmatcher tmp u url file = do
backend <- chooseBackend file
let source = KeySource
{ keyFilename = file
@ -374,7 +374,7 @@ finishDownloadWith tmp u url file = do
genKey source nullMeterUpdate backend >>= \case
Nothing -> return Nothing
Just (key, _) -> do
addWorkTree u url file key (Just tmp)
addWorkTree addunlockedmatcher u url file key (Just tmp)
return (Just key)
{- Adds the url size to the Key. -}
@ -384,8 +384,8 @@ addSizeUrlKey urlinfo key = alterKey key $ \d -> d
}
{- Adds worktree file to the repository. -}
addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()
addWorkTree u url file key mtmp = case mtmp of
addWorkTree :: AddUnlockedMatcher -> UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()
addWorkTree addunlockedmatcher u url file key mtmp = case mtmp of
Nothing -> go
Just tmp -> do
-- Move to final location for large file check.
@ -407,15 +407,15 @@ addWorkTree u url file key mtmp = case mtmp of
maybeShowJSON $ JSONChunk [("key", serializeKey key)]
setUrlPresent key url
logChange key u InfoPresent
ifM (addAnnexedFile file key mtmp)
ifM (addAnnexedFile addunlockedmatcher file key mtmp)
( do
when (isJust mtmp) $
logStatus key InfoPresent
, maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . nukeFile)) mtmp
)
nodownloadWeb :: DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
nodownloadWeb o url urlinfo file
nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> FilePath -> Annex (Maybe Key)
nodownloadWeb addunlockedmatcher o url urlinfo file
| Url.urlExists urlinfo = if rawOption o
then nomedia
else either (const nomedia) usemedia
@ -426,20 +426,20 @@ nodownloadWeb o url urlinfo file
where
nomedia = do
let key = Backend.URL.fromUrl url (Url.urlSize urlinfo)
nodownloadWeb' url key file
nodownloadWeb' addunlockedmatcher url key file
usemedia mediafile = do
let dest = if isJust (fileOption o)
then file
else takeFileName mediafile
let mediaurl = setDownloader url YoutubeDownloader
let mediakey = Backend.URL.fromUrl mediaurl Nothing
nodownloadWeb' mediaurl mediakey dest
nodownloadWeb' addunlockedmatcher mediaurl mediakey dest
nodownloadWeb' :: URLString -> Key -> FilePath -> Annex (Maybe Key)
nodownloadWeb' url key file = checkCanAdd file $ do
nodownloadWeb' :: AddUnlockedMatcher -> URLString -> Key -> FilePath -> Annex (Maybe Key)
nodownloadWeb' addunlockedmatcher url key file = checkCanAdd file $ do
showDestinationFile file
liftIO $ createDirectoryIfMissing True (parentDir file)
addWorkTree webUUID url file key Nothing
addWorkTree addunlockedmatcher webUUID url file key Nothing
return (Just key)
url2file :: URI -> Maybe Int -> Int -> FilePath

View file

@ -5,6 +5,8 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.Config where
import Command
@ -54,12 +56,14 @@ seek :: Action -> CommandSeek
seek (SetConfig ck@(ConfigKey name) val) = commandAction $
startingUsualMessages (decodeBS' name) (ActionItemOther (Just (fromConfigValue val))) $ do
setGlobalConfig ck val
setConfig ck (fromConfigValue val)
when (needLocalUpdate ck) $
setConfig ck (fromConfigValue val)
next $ return True
seek (UnsetConfig ck@(ConfigKey name)) = commandAction $
startingUsualMessages (decodeBS' name) (ActionItemOther (Just "unset")) $do
unsetGlobalConfig ck
unsetConfig ck
when (needLocalUpdate ck) $
unsetConfig ck
next $ return True
seek (GetConfig ck) = commandAction $
startingCustomOutput (ActionItemOther Nothing) $ do
@ -67,3 +71,7 @@ seek (GetConfig ck) = commandAction $
Nothing -> return ()
Just (ConfigValue v) -> liftIO $ S8.putStrLn v
next $ return True
needLocalUpdate :: ConfigKey -> Bool
needLocalUpdate (ConfigKey "annex.securehashesonly") = True
needLocalUpdate _ = False

View file

@ -27,7 +27,6 @@ import Annex.Content
import Annex.Transfer
import Annex.CatFile
import Annex.FileMatcher
import Types.FileMatcher
import Annex.RemoteTrackingBranch
import Logs.Location
import Logs.Export

View file

@ -41,7 +41,7 @@ start = do
stop
guardTest :: Annex ()
guardTest = unlessM (fromMaybe False . Git.Config.isTrue' <$> getConfig key mempty) $
guardTest = unlessM (fromMaybe False . Git.Config.isTrueFalse' <$> getConfig key mempty) $
giveup $ unlines
[ "Running fuzz tests *writes* to and *deletes* files in"
, "this repository, and pushes those changes to other"

View file

@ -102,7 +102,8 @@ seek o@(LocalImportOptions {}) = startConcurrency commandStages $ do
unless (null inrepops) $ do
giveup $ "cannot import files from inside the working tree (use git annex add instead): " ++ unwords inrepops
largematcher <- largeFilesMatcher
(commandAction . startLocal largematcher (duplicateMode o))
addunlockedmatcher <- addUnlockedMatcher
(commandAction . startLocal addunlockedmatcher largematcher (duplicateMode o))
`withPathContents` importFiles o
seek o@(RemoteImportOptions {}) = startConcurrency commandStages $ do
r <- getParsed (importFromRemote o)
@ -114,8 +115,8 @@ seek o@(RemoteImportOptions {}) = startConcurrency commandStages $ do
(importToSubDir o)
seekRemote r (importToBranch o) subdir
startLocal :: GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
startLocal largematcher mode (srcfile, destfile) =
startLocal :: AddUnlockedMatcher -> GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
startLocal addunlockedmatcher largematcher mode (srcfile, destfile) =
ifM (liftIO $ isRegularFile <$> getSymbolicLinkStatus srcfile)
( starting "import" (ActionItemWorkTreeFile (toRawFilePath destfile))
pickaction
@ -208,7 +209,11 @@ startLocal largematcher mode (srcfile, destfile) =
warning $ "not overwriting existing " ++ destfile ++ " " ++ why
stop
lockdown a = do
lockingfile <- not <$> addUnlocked
let mi = MatchingFile $ FileInfo
{ currFile = toRawFilePath srcfile
, matchFile = toRawFilePath destfile
}
lockingfile <- not <$> addUnlocked addunlockedmatcher mi
-- Minimal lock down with no hard linking so nothing
-- has to be done to clean up from it.
let cfg = LockDownConfig

View file

@ -38,6 +38,7 @@ import Annex.YoutubeDl
import Types.MetaData
import Logs.MetaData
import Annex.MetaData
import Annex.FileMatcher
import Command.AddUrl (addWorkTree)
cmd :: Command
@ -62,11 +63,12 @@ optParser desc = ImportFeedOptions
seek :: ImportFeedOptions -> CommandSeek
seek o = do
addunlockedmatcher <- addUnlockedMatcher
cache <- getCache (templateOption o)
forM_ (feedUrls o) (getFeed o cache)
forM_ (feedUrls o) (getFeed addunlockedmatcher o cache)
getFeed :: ImportFeedOptions -> Cache -> URLString -> CommandSeek
getFeed opts cache url = do
getFeed :: AddUnlockedMatcher -> ImportFeedOptions -> Cache -> URLString -> CommandSeek
getFeed addunlockedmatcher opts cache url = do
showStart' "importfeed" (Just url)
downloadFeed url >>= \case
Nothing -> showEndResult =<< feedProblem url
@ -77,7 +79,7 @@ getFeed opts cache url = do
[] -> debugfeedcontent feedcontent "bad feed content; no enclosures to download"
l -> do
showEndOk
ifM (and <$> mapM (performDownload opts cache) l)
ifM (and <$> mapM (performDownload addunlockedmatcher opts cache) l)
( clearFeedProblem url
, void $ feedProblem url
"problem downloading some item(s) from feed"
@ -153,8 +155,8 @@ downloadFeed url
, return Nothing
)
performDownload :: ImportFeedOptions -> Cache -> ToDownload -> Annex Bool
performDownload opts cache todownload = case location todownload of
performDownload :: AddUnlockedMatcher -> ImportFeedOptions -> Cache -> ToDownload -> Annex Bool
performDownload addunlockedmatcher opts cache todownload = case location todownload of
Enclosure url -> checkknown url $
rundownload url (takeWhile (/= '?') $ takeExtension url) $ \f -> do
r <- Remote.claimingUrl url
@ -171,7 +173,7 @@ performDownload opts cache todownload = case location todownload of
-- don't use youtube-dl
, rawOption = True
}
maybeToList <$> addUrlFile dlopts url urlinfo f
maybeToList <$> addUrlFile addunlockedmatcher dlopts url urlinfo f
else do
res <- tryNonAsync $ maybe
(error $ "unable to checkUrl of " ++ Remote.name r)
@ -181,10 +183,10 @@ performDownload opts cache todownload = case location todownload of
Left _ -> return []
Right (UrlContents sz _) ->
maybeToList <$>
downloadRemoteFile r (downloadOptions opts) url f sz
downloadRemoteFile addunlockedmatcher r (downloadOptions opts) url f sz
Right (UrlMulti l) -> do
kl <- forM l $ \(url', sz, subf) ->
downloadRemoteFile r (downloadOptions opts) url' (f </> fromSafeFilePath subf) sz
downloadRemoteFile addunlockedmatcher r (downloadOptions opts) url' (f </> fromSafeFilePath subf) sz
return $ if all isJust kl
then catMaybes kl
else []
@ -273,7 +275,7 @@ performDownload opts cache todownload = case location todownload of
[] -> ".m"
s -> s
ok <- rundownload linkurl ext $ \f -> do
addWorkTree webUUID mediaurl f mediakey (Just mediafile)
addWorkTree addunlockedmatcher webUUID mediaurl f mediakey (Just mediafile)
return [mediakey]
return (Just ok)
-- youtude-dl didn't support it, so
@ -285,16 +287,16 @@ performDownload opts cache todownload = case location todownload of
return Nothing
return (fromMaybe False r)
where
downloadlink = performDownload opts cache todownload
downloadlink = performDownload addunlockedmatcher opts cache todownload
{ location = Enclosure linkurl }
addmediafast linkurl mediaurl mediakey =
ifM (pure (not (rawOption (downloadOptions opts)))
<&&> youtubeDlSupported linkurl)
( rundownload linkurl ".m" $ \f -> do
addWorkTree webUUID mediaurl f mediakey Nothing
addWorkTree addunlockedmatcher webUUID mediaurl f mediakey Nothing
return [mediakey]
, performDownload opts cache todownload
, performDownload addunlockedmatcher opts cache todownload
{ location = Enclosure linkurl }
)

View file

@ -9,7 +9,6 @@ module Command.MatchExpression where
import Command
import Annex.FileMatcher
import Types.FileMatcher
import Utility.DataUnits
import Utility.Matcher
import Annex.UUID
@ -75,7 +74,7 @@ optParser desc = MatchExpressionOptions
seek :: MatchExpressionOptions -> CommandSeek
seek o = do
parser <- if largeFilesExpression o
then mkLargeFilesParser
then mkMatchExpressionParser
else do
u <- getUUID
pure $ preferredContentParser $ preferredContentTokens $ PCD

View file

@ -1,6 +1,6 @@
{- git-annex configuration
-
- Copyright 2017 Joey Hess <id@joeyh.name>
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -15,22 +15,26 @@ import Logs.Config
{- Gets a specific setting from GitConfig. If necessary, loads the
- repository-global defaults when the GitConfig does not yet
- have a value. -}
- have a value.
-
- Note: Be sure to add the config value to mergeGitConfig.
-}
getGitConfigVal :: (GitConfig -> Configurable a) -> Annex a
getGitConfigVal f = do
v <- f <$> Annex.getGitConfig
case v of
HasConfig c -> return c
DefaultConfig _ -> do
r <- Annex.gitRepo
m <- loadGlobalConfig
let globalgc = extractGitConfig (r { config = m })
-- This merge of the repo-global config and the git
-- config makes all repository-global default
-- values populate the GitConfig with HasConfig
-- values, so it will only need to be done once.
Annex.changeGitConfig (\gc -> mergeGitConfig gc globalgc)
v' <- f <$> Annex.getGitConfig
case v' of
HasConfig c -> return c
DefaultConfig d -> return d
getGitConfigVal f = getGitConfigVal' f >>= \case
HasGlobalConfig c -> return c
DefaultConfig d -> return d
HasGitConfig c -> return c
getGitConfigVal' :: (GitConfig -> Configurable a) -> Annex (Configurable a)
getGitConfigVal' f = (f <$> Annex.getGitConfig) >>= \case
DefaultConfig _ -> do
r <- Annex.gitRepo
m <- loadGlobalConfig
let globalgc = extractGitConfig FromGlobalConfig (r { config = m })
-- This merge of the repo-global config and the git
-- config makes all repository-global default
-- values populate the GitConfig with HasGlobalConfig
-- values, so it will only need to be done once.
Annex.changeGitConfig (\gc -> mergeGitConfig gc globalgc)
f <$> Annex.getGitConfig
c -> return c

View file

@ -137,7 +137,7 @@ applyCommitModeForCommitTree :: CommitMode -> [CommandParam] -> Repo -> [Command
applyCommitModeForCommitTree commitmode ps r
| commitmode == ManualCommit =
case Git.Config.getMaybe "commit.gpgsign" r of
Just s | Git.Config.isTrue' s == Just True ->
Just s | Git.Config.isTrueFalse' s == Just True ->
Param "-S":ps
_ -> ps'
| otherwise = ps'

View file

@ -156,12 +156,12 @@ parse s
. map (\(k,v) -> (ConfigKey k, [ConfigValue (S.drop 1 v)]))
. map (S.break (== c))
{- Checks if a string from git config is a true value. -}
isTrue :: String -> Maybe Bool
isTrue = isTrue' . ConfigValue . encodeBS'
{- Checks if a string from git config is a true/false value. -}
isTrueFalse :: String -> Maybe Bool
isTrueFalse = isTrueFalse' . ConfigValue . encodeBS'
isTrue' :: ConfigValue -> Maybe Bool
isTrue' (ConfigValue s)
isTrueFalse' :: ConfigValue -> Maybe Bool
isTrueFalse' (ConfigValue s)
| s' == "true" = Just True
| s' == "false" = Just False
| otherwise = Nothing
@ -177,7 +177,7 @@ boolConfig' True = "true"
boolConfig' False = "false"
isBare :: Repo -> Bool
isBare r = fromMaybe False $ isTrue' =<< getMaybe coreBare r
isBare r = fromMaybe False $ isTrueFalse' =<< getMaybe coreBare r
coreBare :: ConfigKey
coreBare = "core.bare"

View file

@ -46,6 +46,7 @@ unsetGlobalConfig name = do
setGlobalConfig' name (ConfigValue mempty)
-- Reads the global config log every time.
-- It's more efficient to use Config.GitConfig.
getGlobalConfig :: ConfigKey -> Annex (Maybe ConfigValue)
getGlobalConfig name = M.lookup name <$> loadGlobalConfig

View file

@ -42,7 +42,6 @@ import Types.Group
import Types.Remote (RemoteConfig)
import Logs.Group
import Logs.Remote
import Types.FileMatcher
import Types.StandardGroups
import Limit

View file

@ -18,7 +18,7 @@ import Types.CleanupActions
import Types.UrlContents
import qualified Git
import Config
import Git.Config (isTrue, boolConfig)
import Git.Config (isTrueFalse, boolConfig)
import Git.Env
import Remote.Helper.Special
import Remote.Helper.ExportImport
@ -157,7 +157,7 @@ externalSetup _ mu _ c gc = do
(c', _encsetup) <- encryptionSetup c gc
c'' <- case M.lookup "readonly" c of
Just v | isTrue v == Just True -> do
Just v | isTrueFalse v == Just True -> do
setConfig (remoteConfig (fromJust (lookupName c)) "readonly") (boolConfig True)
return c'
_ -> do

View file

@ -857,13 +857,13 @@ mkState r u gc = do
where
go
| remoteAnnexCheckUUID gc = return
(return True, return (r, extractGitConfig r))
(return True, return (r, extractGitConfig FromGitConfig r))
| otherwise = do
rv <- liftIO newEmptyMVar
let getrepo = ifM (liftIO $ isEmptyMVar rv)
( do
r' <- tryGitConfigRead False r
let t = (r', extractGitConfig r')
let t = (r', extractGitConfig FromGitConfig r')
void $ liftIO $ tryPutMVar rv t
return t
, liftIO $ readMVar rv

View file

@ -96,7 +96,7 @@ getDifferences :: Git.Repo -> Differences
getDifferences r = mkDifferences $ S.fromList $
mapMaybe getmaybe [minBound .. maxBound]
where
getmaybe d = case Git.Config.isTrue' =<< Git.Config.getMaybe (differenceConfigKey d) r of
getmaybe d = case Git.Config.isTrueFalse' =<< Git.Config.getMaybe (differenceConfigKey d) r of
Just True -> Just d
_ -> Nothing

View file

@ -12,7 +12,7 @@ import Utility.Split
import Types.Key
import Key
import Data.Time.Clock
import Git.Config (isTrue, boolConfig)
import Git.Config (isTrueFalse, boolConfig)
import Control.Applicative
import Prelude
@ -71,7 +71,7 @@ toAutoUpgrade :: Maybe String -> AutoUpgrade
toAutoUpgrade Nothing = AskUpgrade
toAutoUpgrade (Just s)
| s == "ask" = AskUpgrade
| isTrue s == Just True = AutoUpgrade
| isTrueFalse s == Just True = AutoUpgrade
| otherwise = NoAutoUpgrade
fromAutoUpgrade :: AutoUpgrade -> String

View file

@ -9,6 +9,7 @@
module Types.GitConfig (
Configurable(..),
ConfigSource(..),
GitConfig(..),
extractGitConfig,
mergeGitConfig,
@ -46,13 +47,17 @@ import qualified Data.Set as S
-- | A configurable value, that may not be fully determined yet because
-- the global git config has not yet been loaded.
data Configurable a
= HasConfig a
-- ^ Value is fully determined.
= HasGitConfig a
-- ^ The git config has a value.
| HasGlobalConfig a
-- ^ The global config has a value (and the git config does not).
| DefaultConfig a
-- ^ A default value is known, but not all config sources
-- have been read yet.
deriving (Show)
data ConfigSource = FromGitConfig | FromGlobalConfig
{- Main git-annex settings. Each setting corresponds to a git-config key
- such as annex.foo -}
data GitConfig = GitConfig
@ -80,7 +85,7 @@ data GitConfig = GitConfig
, annexYoutubeDlOptions :: [String]
, annexAriaTorrentOptions :: [String]
, annexCrippledFileSystem :: Bool
, annexLargeFiles :: Maybe String
, annexLargeFiles :: Configurable (Maybe String)
, annexGitAddToAnnex :: Bool
, annexAddSmallFiles :: Bool
, annexFsckNudge :: Bool
@ -97,7 +102,7 @@ data GitConfig = GitConfig
, annexVerify :: Bool
, annexPidLock :: Bool
, annexPidLockTimeout :: Seconds
, annexAddUnlocked :: Bool
, annexAddUnlocked :: Configurable (Maybe String)
, annexSecureHashesOnly :: Bool
, annexRetry :: Maybe Integer
, annexRetryDelay :: Maybe Seconds
@ -116,8 +121,8 @@ data GitConfig = GitConfig
, gpgCmd :: GpgCmd
}
extractGitConfig :: Git.Repo -> GitConfig
extractGitConfig r = GitConfig
extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig
extractGitConfig configsource r = GitConfig
{ annexVersion = RepoVersion <$> getmayberead (annex "version")
, annexUUID = maybe NoUUID toUUID $ getmaybe (annex "uuid")
, annexNumCopies = NumCopies <$> getmayberead (annex "numcopies")
@ -151,7 +156,8 @@ extractGitConfig r = GitConfig
, annexYoutubeDlOptions = getwords (annex "youtube-dl-options")
, annexAriaTorrentOptions = getwords (annex "aria-torrent-options")
, annexCrippledFileSystem = getbool (annex "crippledfilesystem") False
, annexLargeFiles = getmaybe (annex "largefiles")
, annexLargeFiles = configurable Nothing $
fmap Just $ getmaybe (annex "largefiles")
, annexGitAddToAnnex = getbool (annex "gitaddtoannex") True
, annexAddSmallFiles = getbool (annex "addsmallfiles") True
, annexFsckNudge = getbool (annex "fscknudge") True
@ -171,7 +177,8 @@ extractGitConfig r = GitConfig
, annexPidLock = getbool (annex "pidlock") False
, annexPidLockTimeout = Seconds $ fromMaybe 300 $
getmayberead (annex "pidlocktimeout")
, annexAddUnlocked = getbool (annex "addunlocked") False
, annexAddUnlocked = configurable Nothing $
fmap Just $ getmaybe (annex "addunlocked")
, annexSecureHashesOnly = getbool (annex "securehashesonly") False
, annexRetry = getmayberead (annex "retry")
, annexRetryDelay = Seconds
@ -201,7 +208,7 @@ extractGitConfig r = GitConfig
}
where
getbool k d = fromMaybe d $ getmaybebool k
getmaybebool k = Git.Config.isTrue' =<< getmaybe' k
getmaybebool k = Git.Config.isTrueFalse' =<< getmaybe' k
getmayberead k = readish =<< getmaybe k
getmaybe = fmap fromConfigValue . getmaybe'
getmaybe' k = Git.Config.getMaybe k r
@ -209,7 +216,9 @@ extractGitConfig r = GitConfig
getwords k = fromMaybe [] $ words <$> getmaybe k
configurable d Nothing = DefaultConfig d
configurable _ (Just v) = HasConfig v
configurable _ (Just v) = case configsource of
FromGitConfig -> HasGitConfig v
FromGlobalConfig -> HasGlobalConfig v
annex k = ConfigKey $ "annex." <> k
@ -221,13 +230,17 @@ mergeGitConfig :: GitConfig -> GitConfig -> GitConfig
mergeGitConfig gitconfig repoglobals = gitconfig
{ annexAutoCommit = merge annexAutoCommit
, annexSyncContent = merge annexSyncContent
, annexResolveMerge = merge annexResolveMerge
, annexLargeFiles = merge annexLargeFiles
, annexAddUnlocked = merge annexAddUnlocked
}
where
merge f = case f gitconfig of
HasConfig v -> HasConfig v
HasGitConfig v -> HasGitConfig v
DefaultConfig d -> case f repoglobals of
HasConfig v -> HasConfig v
DefaultConfig _ -> HasConfig d
HasGlobalConfig v -> HasGlobalConfig v
_ -> HasGitConfig d
HasGlobalConfig v -> HasGlobalConfig v
{- Per-remote git-annex settings. Each setting corresponds to a git-config
- key such as <remote>.annex-foo, or if that is not set, a default from
@ -343,7 +356,7 @@ extractRemoteGitConfig r remotename = do
}
where
getbool k d = fromMaybe d $ getmaybebool k
getmaybebool k = Git.Config.isTrue' =<< getmaybe' k
getmaybebool k = Git.Config.isTrueFalse' =<< getmaybe' k
getmayberead k = readish =<< getmaybe k
getmaybe = fmap fromConfigValue . getmaybe'
getmaybe' k = mplus (Git.Config.getMaybe (key k) r)

View file

@ -113,7 +113,10 @@ prop_upFrom_basics dir
- are all equivilant.
-}
dirContains :: FilePath -> FilePath -> Bool
dirContains a b = a == b || a' == b' || (addTrailingPathSeparator a') `isPrefixOf` b'
dirContains a b = a == b
|| a' == b'
|| (addTrailingPathSeparator a') `isPrefixOf` b'
|| a' == "." && normalise ("." </> b') == b'
where
a' = norm a
b' = norm b

View file

@ -74,3 +74,4 @@ Thanks for having a look.
[0]: https://github.com/datalad/datalad/issues/3890
[[!meta author=kyle]]
[[!tag projects/datalad]]

View file

@ -0,0 +1,97 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2019-12-26T16:56:38Z"
content="""
The title makes it sound like a work tree file gets replaced with a
dangling pointer file, which is not the case. A worktree file that was
not annexed is is being added to the annex, if you choose to commit that
state.
For whatever reason, git becomes confused about whether this file is
modified. I seem to recall that git distrusts information it recorded in
its own index if the mtime of the index file is too close to the
mtime recorded inside it, or something like that. (Likely as a
workaround for mtime granularity issues with various filesystems.) Whatever
the reason, git-annex is not involved in it; it will happen sometimes even
when git-annex has not initialized the repo and is not being used.
It's not normally a problem that git gets confused or distrusts its
index or whatever, since all it does is stat the file, or
feed it through the clean filter again, and if the file is not
modified, nothing changes.
Why does the clean filter decide to add the file to annex in this case?
Well, because this is all happening inside this:
git -c annex.largefiles=anything annex add -- file-annex
And there you've told it to add all files to the annex with
annex.largefiles=anything. So it does.
To complete the description of what happens:
`git-annex add` runs `git add` on the `file-annex` symlink it's adding.
`git add file-annex`, for whatever reason, decides to run the clean filter on
file-git.
The annex.largefiles=anything gets inherited through this chain of calls.
While the resulting "change" does not get staged by `git add`
(it was never asked to operate on that file), the clean filter
duly ingests the content into the annex, and remembers its inode.
So when the clean filter later gets run by `git status`, it sees an inode
it knows it saw before, and assumes it should remain annexed.
(This is why the commit that checks for known inodes was fingered by the
bisection.)
---
Note that, you can accomplish the same thing without setting
annex.largefiles, assuming a current version of git-annex:
git add file-git
git annex add file-annex
I think the only reason for setting annex.largefiles in either of the two
places you did is if there's a default value that you want to
temporarily override?
----
Also, just touching file-git before the annex.largefiles=anything
operation causes the same problem, again git-annex add runs git add
file-annex, which runs the clean filter on file-git, which this time
is legitimately modified.
---
Possible ways to improve this short of improving git's behavior:
`git annex` could set annex.gitaddtoannex=false when it runs `git add`.
Since git-annex never relies on `git add` adding files to the annex,
that seems entirely safe to always do (perhaps even when running all git
commands aside from git-annex commands of course). But, that would
not help with a variant where rather than `git-annex add`,
this is run:
git -c annex.largefiles=anything add file-annex
The clean filter could delay long enough that git stops distrusting
its index based on timestamps. A 1 second sleep if the file's mtime
is too close to the current time works; I prototyped a patch doing that.
But, that does not deal with the case
mentioned above where file-git gets touched or legitimately modified.
The clean filter could check if the file is already
in the index but is not annexed, and avoid converting it to annexed.
But that would prevent legitimate conversions from git to annexed
as well, which rely on the same kind of use of annex.largefiles.
Temporary overrides of annex.largefiles could be ignored by the clean
filter. Same problem as previous.
So, I think that fixing this will involve adding a new interface for
converting between git and annexed files that does not involve
-c annex.largefiles. That plus having the clean filter check for
non-annexed files seems like the best approach.
"""]]

View file

@ -0,0 +1,22 @@
### Please describe the problem.
[Original report](https://git-annex.branchable.com/bugs/manages_to_incorrectly_add_to_annex_instead_of_git_based_on___34__mimetype__34___-_we_cannot_figure_it_out_why/) was about the change in behavior of git-annex (as a whole installed bundle) that .json files started to be added to git-annex instead of git, whenever .json files remained text files and .gitattributes was configured to add all files with mimetype of text to go into git.
It happened due to the fact that libmagic added handling for detecting .json files and reporting that they are `application/json` instead of `text/plain` as before.
Note that [initial demand/idea behind adding treatment of mime types](https://git-annex.branchable.com/devblog/day_360__annex.largefiles_mimetype/) was actually to provide automation for the most reasonable decision making on what goes into git and what annex, based on either a file a text file or binary.
The bug report referenced above was just marked "done" with a comment that "the magic database changing behavior is not a bug in git-annex" without actually addressing the underlying issue. I even somehow got a wrong impression that "we had it fixed" and was surprised to stumble into it again. I think that the issue should be properly addressed, ideally without requiring users to adjust their `.gitattribute` files (and introducing newer git annex version dependency), so that the desired behavior of having text files going into git, not git-annex, was maintained *even across changes in libmagic DB*.
One, IMHO the easiest way, now that `-k` (keep going) [issue was fixed in libmagic](https://bugs.astron.com/view.php?id=77), would be for git-annex to treat "mimetype" specification as "if any mimetype matches" and ask libmagic about all mimetypes of a file, e.g.:
[[!format sh """
$> file --mime-type -Lk 1.json
1.json: application/json\012- text/plain
"""]]
so that if any structured text file would soon acquire additional, more specific mimetype (e.g., `.md` could be reported as `application/markdown`, just not yet), previous specifications in .gitattributes would still work -- after all those files remain `text/plain` files!
If strict matching (not sure yet about a use case where it would really be needed) by the most specialized mime type is needed, additional "mimetypefirstguess" or alike could be added.
[[!meta author=yoh]]
[[!tag projects/datalad]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="yarikoptic"
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
subject="oh hoh, there is mimeencoding now"
date="2019-12-20T19:54:04Z"
content="""
OH!! \"Much ado about nothing\". As [Joey reported in datalad issues](https://github.com/datalad/datalad/issues/3361#issuecomment-488018126) there is now handling of `mimeencoding=binary` as the ultimate decision maker. So we are probably doomed (unless Joey sees reason in the reasoning above and implements that as well) to do go through all datasets and autoadjust them to use `mimeencoding` instead of `mimetype`.
"""]]

View file

@ -0,0 +1,23 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2019-12-20T20:00:14Z"
content="""
Yes, mimeencoding=binary is intended for those cases where you just want a
robust (presumably) text/binary division.
The "any mimetype matches" approach seems like it could break things too.
Consider:
(not mimetype=text/plain and (mimetype=text/* or mimetype=application/json)) or mimetype=AI/buggy
Currently a shell script is found to be only text/x-shellscript,
so it would match the above. If git-annex were changed to consider
all reported mime types, the shell script, being also text/plain
would not match.
And then, once the mime database solves the halting problem and helpfully
starts flagging shell scripts as AI/buggy (all shell scripts are presumably
buggy so maybe that AI has an easy job), the behavior on the above example
would change for a third time, back to matching.
"""]]

View file

@ -0,0 +1,58 @@
### Please describe the problem.
When using [[submodules]], a `git annex init` in the submodule drops the `core.workdir` configuration from the git config around the time it replaces the plain `.git` file with a symlink.
The lack of that option rarely affects anything (the command-line git or -annex operations I've tried from inside the workdir went OK), but when libgit2 is used to load the submodule and discover its workdir, the missing option becomes apparent, and libgit2 reports an incorrect working directory.
### What steps will reproduce the problem?
As this is about a comparatively comprehensive setup, I've phrased it as a shell script:
[[!format sh """
git init inner-original
cd inner-original
touch inner-file
git add inner-file
git commit -m "initial inner check-in"
cd ..
git init outer
cd outer
git submodule add ../inner-original inner
cd ..
cp -a outer outer-with-annex
cp -a outer outer-with-ln
cd outer-with-annex/inner
git annex init
cd ../..
cd outer-with-ln/inner
ln -sf ../.git/modules/inner .git
cd ../..
"""]]
Now apparently, all those inner directories still work (eg. `git diff inner-file`).
However, `git config core.worktree` only reports on those that `git annex` did not touch.
The practical problem arising with this can most easily seen when executing the following Python snippet in any of the outer directories:
[[!format python """
import pygit2
r = pygit2.Repository(".")
r2 = r.lookup_submodule("inner").open()
print (r.workdir, r2.workdir)
"""]]
which reports the workdir of the outer correctly, but reports `$PATH/outer-with-annex/.git/modules/` (which is definitely not a workdir) for the annex.
When the option is restored, libgit2's discovery works, but git-annex stops working with "git-annex: /tmp/inner: changeWorkingDirectory: does not exist (No such file or directory)" -- apparently, git-annex does use the workdir if it is set, but uses different resolution rules than git.
### What version of git-annex are you using? On what operating system?
git-annex 7.20191114 as packaged in Debian as 7.20191114-1.
### Have you had any luck using git-annex before?
Luck had nothing to do with it -- it's by its design that it usually works :-)

View file

@ -0,0 +1,15 @@
[[!comment format=mdwn
username="https://christian.amsuess.com/chrysn"
nickname="chrysn"
avatar="http://christian.amsuess.com/avatar/c6c0d57d63ac88f3541522c4b21198c3c7169a665a2f2d733b4f78670322ffdc"
subject="Workaround"
date="2019-12-22T10:50:32Z"
content="""
As a workaround to keep both git submodule workdir discovery and git annex functional, the original git submodule core.worktree parameter can be restored, and overridden for git-annex calls using
[[!format sh \"\"\"
GIT_WORK_TREE=\"$PWD\" git annex whereis
\"\"\"]]
and similar.
"""]]

View file

@ -27,6 +27,29 @@ repository see the setting, and so git-annex only looks for these:
These settings can be overridden on a per-repository basis using
`git config`.
* `annex.largefiles`
Used to configure which files are large enough to be added to the annex.
It is an expression that matches the large files, eg
"include=*.mp3 or largerthan(500kb)".
See [[git-annex-matching-expression]](1) for details on the syntax.
This sets a default, which can be overridden by annex.largefiles
attributes in `.gitattributes` files, or by `git config`.
* `annex.addunlocked`
Commands like `git-annex add` default to adding files to the repository
in locked form. This can make them add the files in unlocked form,
the same as if [[git-annex-unlock]](1) were run on the files.
This can be set to "true" to add everything unlocked, or it can be a more
complicated expression that matches files by name, size, or content. See
[[git-annex-matching-expression]](1) for details.
This sets a default, which can be overridden by annex.addunlocked
in `git config`.
* `annex.autocommit`
Set to false to prevent the `git-annex assistant` and `git-annex sync`

View file

@ -59,6 +59,8 @@ For example, this will exit 0:
[[git-annex-preferred-content]](1)
[[git-annex-matching-expression]](1)
# AUTHOR
Joey Hess <id@joeyh.name>

View file

@ -0,0 +1,87 @@
# NAME
git-annex-matching-expression - specifying a set of files
# DESCRIPTION
The annex.largefiles and annex.addunlocked configurations both use
expressions that match some files in the working tree.
# SYNTAX
The format of these expressions is similar to
[[git-annex-preferred-content]](1) expressions.
For example:
largerthan=100kb and not (include=*.c or include=*.h)
This matches large files, but excludes C source files.
The following terms can be used:
* `include=glob` / `exclude=glob`
Specify files to include or exclude.
The glob can contain `*` and `?` to match arbitrary characters.
* `smallerthan=size` / `largerthan=size`
Matches only files smaller than, or larger than the specified size.
The size can be specified with any commonly used units, for example,
"0.5 gb" or "100 KiloBytes"
* `mimetype=glob`
Looks up the MIME type of a file, and checks if the glob matches it.
For example, `"mimetype=text/*"` will match many varieties of text files,
including "text/plain", but also "text/x-shellscript", "text/x-makefile",
etc.
The MIME types are the same that are displayed by running `file --mime-type`
This is only available to use when git-annex was built with the
MagicMime build flag.
* `mimeencoding=glob`
Looks up the MIME encoding of a file, and checks if the glob matches it.
For example, `"mimeencoding=binary"` will match many kinds of binary
files.
The MIME encodings are the same that are displayed by running `file --mime-encoding`
This is only available to use when git-annex was built with the
MagicMime build flag.
* `anything`
Matches any file.
* `nothing`
Matches no files. (Same as "not anything")
* `not expression`
Inverts what the expression matches.
* `and` / `or` / `( expression )`
These can be used to build up more complicated expressions.
# SEE ALSO
[[git-annex]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
<http://git-annex.branchable.com/>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -891,9 +891,13 @@ Like other git commands, git-annex is configured via `.git/config`.
Used to configure which files are large enough to be added to the annex.
It is an expression that matches the large files, eg
"*.mp3 or largerthan(500kb)"
"include=*.mp3 or largerthan(500kb)"
See [[git-annex-matching-expression]](1) for details on the syntax.
Overrides any annex.largefiles attributes in `.gitattributes` files.
To configure a default annex.largefiles for all clones of the repository,
this can be set in [[git-annex-config]](1).
This configures the behavior of both git-annex and git when adding
files to the repository. By default, `git-annex add` adds all files
@ -906,13 +910,10 @@ Like other git commands, git-annex is configured via `.git/config`.
`git annex import`, `git annex addurl`, `git annex importfeed`
and the assistant.
See <https://git-annex.branchable.com/tips/largefiles> for syntax
documentation and more.
* `annex.gitaddtoannex`
Setting this to false will prevent `git add` from honoring the
annex.largefiles configuration.
Setting this to false will prevent `git add` from adding
files to the annex, despite the annex.largefiles configuration.
* `annex.addsmallfiles`
@ -922,15 +923,23 @@ Like other git commands, git-annex is configured via `.git/config`.
* `annex.addunlocked`
Set to true to make commands like `git-annex add` that add files to the
repository add them in unlocked form. The default is for these commands
to add files in locked form.
Commands like `git-annex add` default to adding files to the repository
in locked form. This can make them add the files in unlocked form,
the same as if [[git-annex-unlock]](1) were run on the files.
This can be set to "true" to add everything unlocked, or it can be a more
complicated expression that matches files by name, size, or content. See
[[git-annex-matching-expression]](1) for details.
To configure a default annex.addunlocked for all clones of the repository,
this can be set in [[git-annex-config]](1).
(Using `git add` always adds files in unlocked form and it is not
affected by this setting.)
When a repository has core.symlinks set to false, it implicitly
sets annex.addunlocked to true.
When a repository has core.symlinks set to false, or has an adjusted
unlocked branch checked out, this setting is ignored, and files are
always added to the repository in unlocked form.
* `annex.numcopies`
@ -1695,9 +1704,11 @@ but the SHA256E backend for ogg files:
*.ogg annex.backend=SHA256E
There is a annex.largefiles attribute, which is used to configure which
files are large enough to be added to the annex.
See the documentation above of the annex.largefiles git config
and <https://git-annex.branchable.com/tips/largefiles> for details.
files are large enough to be added to the annex. Since attributes cannot
contain spaces, it is difficult to use for more complex annex.largefiles
settings. Setting annex.largefiles in [[git-annex-config]](1) is an easier
way to configure it across all clones of the repository.
See [[git-annex-matching-expression]](1) for details on the syntax.
The numcopies setting can also be configured on a per-file-type basis via
the `annex.numcopies` attribute in `.gitattributes` files. This overrides

View file

@ -0,0 +1,11 @@
[[!comment format=mdwn
username="michael.fsp@85f5ea833fb457cf7b0b0181c314888d5f32649c"
nickname="michael.fsp"
avatar="http://cdn.libravatar.org/avatar/f398e1ea7eed60be1d45caa3126fc691"
subject="windows installer has not been updated to version 7.20191218"
date="2019-12-21T18:17:19Z"
content="""
Just to let you know that the windows installer is still at version 7.20191115
Thanks for the awesome work!
"""]]

View file

@ -26,101 +26,63 @@ the assistant.
For example, let's make only files larger than 100 kb be added to the annex,
and never `*.c` and `*.h` source code files.
Write this to the `.gitattributes` file:
git config annex.largefiles 'largerthan=100kb and not (include=*.c or include=*.h)'
* annex.largefiles=(largerthan=100kb)
That is a local configuration, so will only apply to your clone of the
repository. To set a default that will apply to all clones, unless
overridden, do this instead:
git annex config --set annex.largefiles 'largerthan=100kb and not (include=*.c or include=*.h)'
There's one other way to configure the same thing, you can put this in
the `.gitattributes` file:
* annex.largefiles=largerthan=100kb
*.c annex.largefiles=nothing
*.h annex.largefiles=nothing
Or, set the git configuration instead:
git config annex.largefiles 'largerthan=100kb and not (include=*.c or include=*.h)'
The syntax in .gitattributes is a bit different, because the .gitattributes
matches files itself, and the values of attributes cannot contain spaces.
So using .gitattributes for this is not recommended (but it does work for
older versions of git-annex, where the `git annex config` setting does
not). Any .gitattributes setting overrides the `git annex config` setting,
but will be overridden by the `git config` setting.
Both of these settings do the same thing. Setting it in the
`.gitattributes` file makes any checkout of the repository share that
configuration, so is often a good choice. Setting the annex.largefiles git
configuration lets different checkouts behave differently. The git
configuration overrides the `.gitattributes` configuration.
Or, perhaps you just want all files to be added to the annex, no matter
what. Just write "* annex.largefiles=anything" to the `.gitattributes`
file, or run:
Another example. If you wanted `git add` to put all files the annex
in your local repository:
git config annex.largefiles anything
Or in all clones:
git annex config --set annex.largefiles anything
## syntax
The value of annex.largefiles is similar to a
[[preferred content expression|git-annex-preferred-content]].
The following terms can be used in annex.largefiles:
See [[git-annex-matching-expression]] for details about the syntax.
* `include=glob` / `exclude=glob`
## gitattributes format
Specify files to include or exclude.
Here's that example `.gitattributes` again:
The glob can contain `*` and `?` to match arbitrary characters.
* annex.largefiles=largerthan=100kb
*.c annex.largefiles=nothing
*.h annex.largefiles=nothing
* `smallerthan=size` / `largerthan=size`
The way that works is, `*.c` and `*.h` files have the annex.largefiles
attribute set to "nothing", and so those files are never treated as large
files. All other files use the other value, which checks the file size.
Matches only files smaller than, or larger than the specified size.
The size can be specified with any commonly used units, for example,
"0.5 gb" or "100 KiloBytes"
* `mimetype=glob`
Looks up the MIME type of a file, and checks if the glob matches it.
For example, `"mimetype=text/*"` will match many varieties of text files,
including "text/plain", but also "text/x-shellscript", "text/x-makefile",
etc.
The MIME types are the same that are displayed by running `file --mime-type`
This is only available to use when git-annex was built with the
MagicMime build flag.
* `mimeencoding=glob`
Looks up the MIME encoding of a file, and checks if the glob matches it.
For example, `"mimeencoding=binary"` will match many kinds of binary
files.
The MIME encodings are the same that are displayed by running `file --mime-encoding`
This is only available to use when git-annex was built with the
MagicMime build flag.
* `anything`
Matches any file.
* `nothing`
Matches no files. (Same as "not anything")
* `not expression`
Inverts what the expression matches.
* `and` / `or` / `( expression )`
These can be used to build up more complicated expressions.
The way the `.gitattributes` example above works is, `*.c` and `*.h` files
have the annex.largefiles attribute set to "nothing",
and so those files are never treated as large files. All other files use
the other value, which checks the file size.
Note that, since git attribute values cannot contain whitespace,
it's useful to instead parenthesize the terms of the annex.largefiles
attribute. This trick allows for more complicated expressions.
Since git attribute values cannot contain whitespace, when you need
a more complicated annex.largefiles expression, you can instead
parenthesize the terms of the annex.largefiles attribute.
For example, this is the same as the git config shown earlier, shoehorned
into a git attribute:
into a single git attribute:
* annex.largefiles=(largerthan=100kb)and(not((include=*.c)or(include=*.h)))
It's generally a better idea to use `git annex config` instead.
## temporarily override
If you've set up an annex.largefiles configuration but want to force a file to

View file

@ -3,3 +3,5 @@ Can the `annex.addunlocked` be extended to have the same syntax as `annex.largef
Basically, I want a reliable way to prevent inadvertently adding files as annexed unlocked files.
Related: [[forum/lets_discuss_git_add_behavior]]
> [[done]] --[[Joey]]

View file

@ -0,0 +1,11 @@
[[!comment format=mdwn
username="joey"
subject="""comment 4"""
date="2019-12-20T19:45:21Z"
content="""
Made annex.addunlocked support expressions like annex.largefiles.
And both of them can be set globally with `git annex config`. I did not
make annex.addunlocked be settable by git attribute, because my sense is
that `git annex config` covers that use case, or mostly so.
"""]]

View file

@ -0,0 +1,7 @@
I noticed that with the default SHA256E backend, `git annex reinject --known FILE` will fail if FILE has a different extension than it has in the annex. Presumably this is because `git annex calckey FILE` does not generate the same key, even though the file has the same checksum.
I think it would be better if `git annex reinject --known` would ignore the file extension when deciding whether a file is known. A case where that would be much better is caused by the fact that git-annex has changed how it determines a file's extension over time. E.g. if foo.bar.baz was added to the annex a long time ago, it might have a key like `SHA256E-s12--37833383383.baz`. Modern git-annex would calculate a key like `SHA256E-s12--37833383383.bar.baz` and so the reinject of the file using modern git-annex would fail.
This problem does not affect `git annex reinject` without `--known`.
--spwhitton

View file

@ -0,0 +1,28 @@
`git annex reinject --known` doesn't work in a bare repo.
spwhitton@iris:~/tmp>echo foo >bar
spwhitton@iris:~/tmp>mkdir baz
spwhitton@iris:~/tmp>cd baz
spwhitton@iris:~/tmp/baz>git init --bare
Initialized empty Git repository in /home/spwhitton/tmp/baz/
spwhitton@iris:~/tmp/baz>git annex init
init (scanning for unlocked files...)
ok
(recording state in git...)
spwhitton@iris:~/tmp/baz>git annex reinject --known ../bar
fatal: relative path syntax can't be used outside working tree.
fatal: relative path syntax can't be used outside working tree.
fatal: relative path syntax can't be used outside working tree.
fatal: relative path syntax can't be used outside working tree.
fatal: relative path syntax can't be used outside working tree.
fatal: relative path syntax can't be used outside working tree.
fatal: relative path syntax can't be used outside working tree.
fatal: relative path syntax can't be used outside working tree.
fatal: relative path syntax can't be used outside working tree.
fatal: relative path syntax can't be used outside working tree.
fatal: relative path syntax can't be used outside working tree.
git-annex: fd:15: hGetLine: end of file
Obviously this wasn't actually a file known to git-annex. But I get the same error in a non-dummy bare repo I am trying to reinject.
A workaround is to use `git worktree add` and run `git annex reinject` from there.

View file

@ -0,0 +1,17 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2019-12-19T20:37:18Z"
content="""
How about annex.dotfiles=true enables annexing dotfiles, but only
if annex.largefiles is set to something. Leave it up
to the user to configure annex.largefiles according to their use case.
And if the user neglects to annex.largefiles, this avoids blowing their foot
off by default.
annex.dotfiles could certainly go in the global `git-annex config`;
annex.largefiles would then make sense to be set in .gitattributes,
or also add support for storing in in `git-annex config` (which avoids
the syntatic hacks needed to shoehorn it into .gitattributes, and makes it
be repo-global the same as the annex.dotfiles config).
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="spwhitton"
avatar="http://cdn.libravatar.org/avatar/9c3f08f80e67733fd506c353239569eb"
subject="comment 4"
date="2019-12-21T23:03:46Z"
content="""
Hmm, what would the default value of `annex.dotfiles` be? If the default is false (so that there is no behavioural change unless the user explicitly requests it), then why not have it take effect even if annex.largefiles has not been set?
"""]]