Merge branch 'master' into import-from-s3

This commit is contained in:
Joey Hess 2019-05-01 14:30:52 -04:00
commit 700a3f2787
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
29 changed files with 426 additions and 43 deletions

View file

@ -135,14 +135,27 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
mkLargeFilesParser :: Annex (String -> [ParseResult]) mkLargeFilesParser :: Annex (String -> [ParseResult])
mkLargeFilesParser = do mkLargeFilesParser = do
magicmime <- liftIO initMagicMimeType magicmime <- liftIO initMagicMime
let parse = parseToken $ commonTokens
#ifdef WITH_MAGICMIME #ifdef WITH_MAGICMIME
++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ] let mimer n f = ValueToken n (usev $ f magicmime)
#else #else
++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ] let mimer n = ValueToken n (const $ Left "\""++n++"\" not supported; not built with MagicMime support")
#endif
let parse = parseToken $ commonTokens ++
#ifdef WITH_MAGICMIME
[ mimer "mimetype" $
matchMagic "mimetype" getMagicMimeType providedMimeType
, mimer "mimeencoding" $
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding
]
#else
[ mimer "mimetype"
, mimer "mimeencoding"
,
]
#endif #endif
return $ map parse . tokenizeMatcher return $ map parse . tokenizeMatcher
where
{- Generates a matcher for files large enough (or meeting other criteria) {- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -} - to be added to the annex, rather than directly to git. -}

View file

@ -10,8 +10,10 @@
module Annex.Magic ( module Annex.Magic (
Magic, Magic,
MimeType, MimeType,
initMagicMimeType, MimeEncoding,
initMagicMime,
getMagicMimeType, getMagicMimeType,
getMagicMimeEncoding,
) where ) where
#ifdef WITH_MAGICMIME #ifdef WITH_MAGICMIME
@ -21,25 +23,37 @@ import Common
#else #else
type Magic = () type Magic = ()
#endif #endif
import Types.Mime
initMagicMimeType :: IO (Maybe Magic) initMagicMime :: IO (Maybe Magic)
#ifdef WITH_MAGICMIME #ifdef WITH_MAGICMIME
initMagicMimeType = catchMaybeIO $ do initMagicMime = catchMaybeIO $ do
m <- magicOpen [MagicMimeType] m <- magicOpen [MagicMime]
liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case
Nothing -> magicLoadDefault m Nothing -> magicLoadDefault m
Just d -> magicLoad m Just d -> magicLoad m
(d </> "magic" </> "magic.mgc") (d </> "magic" </> "magic.mgc")
return m return m
#else #else
initMagicMimeType = return Nothing initMagicMime = return Nothing
#endif #endif
type MimeType = String getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding))
#ifdef WITH_MAGICMIME
getMagicMime m f = Just . parse <$> magicFile m f
where
parse s =
let (mimetype, rest) = separate (== ';') s
in case rest of
(' ':'c':'h':'a':'r':'s':'e':'t':'=':mimeencoding) ->
(mimetype, mimeencoding)
_ -> (mimetype, "")
#else
getMagicMime _ _ = return Nothing
#endif
getMagicMimeType :: Magic -> FilePath -> IO (Maybe MimeType) getMagicMimeType :: Magic -> FilePath -> IO (Maybe MimeType)
#ifdef WITH_MAGICMIME getMagicMimeType m f = fmap fst <$> getMagicMime m f
getMagicMimeType m f = Just <$> magicFile m f
#else getMagicMimeEncoding :: Magic -> FilePath -> IO (Maybe MimeEncoding)
getMagicMimeType _ _ = return Nothing getMagicMimeEncoding m f = fmap snd <$> getMagicMime m f
#endif

View file

@ -24,6 +24,10 @@ git-annex (7.20190323) UNRELEASED; urgency=medium
of an old remote for something new. of an old remote for something new.
* Drop support for building with aws older than 0.14. * Drop support for building with aws older than 0.14.
* info: Show when a remote is configured with importtree. * info: Show when a remote is configured with importtree.
* Added mimeencoding= term to annex.largefiles expressions.
This is probably mostly useful to match non-text files with eg
"mimeencoding=binary"
* git-annex matchexpression: Added --mimeencoding option.
-- Joey Hess <id@joeyh.name> Tue, 09 Apr 2019 14:07:53 -0400 -- Joey Hess <id@joeyh.name> Tue, 09 Apr 2019 14:07:53 -0400

View file

@ -38,9 +38,9 @@ optParser desc = MatchExpressionOptions
( long "largefiles" ( long "largefiles"
<> help "parse as annex.largefiles expression" <> help "parse as annex.largefiles expression"
) )
<*> (addkeysize <$> dataparser) <*> (MatchingInfo . addkeysize <$> dataparser)
where where
dataparser = MatchingInfo dataparser = ProvidedInfo
<$> optinfo "file" (strOption <$> optinfo "file" (strOption
( long "file" <> metavar paramFile ( long "file" <> metavar paramFile
<> help "specify filename to match against" <> help "specify filename to match against"
@ -57,15 +57,20 @@ optParser desc = MatchExpressionOptions
( long "mimetype" <> metavar paramValue ( long "mimetype" <> metavar paramValue
<> help "specify mime type to match against" <> help "specify mime type to match against"
)) ))
<*> optinfo "mimeencoding" (strOption
( long "mimeencoding" <> metavar paramValue
<> help "specify mime encoding to match against"
))
optinfo datadesc mk = (Right <$> mk) optinfo datadesc mk = (Right <$> mk)
<|> (pure $ Left $ missingdata datadesc) <|> (pure $ Left $ missingdata datadesc)
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data" missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
-- When a key is provided, use its size. -- When a key is provided, make its size also be provided.
addkeysize i@(MatchingInfo f (Right k) _ m) = case keySize k of addkeysize p = case providedKey p of
Just sz -> MatchingInfo f (Right k) (Right sz) m Right k -> case keySize k of
Nothing -> i Just sz -> p { providedFileSize = Right sz }
addkeysize i = i Nothing -> p
Left _ -> p
seek :: MatchExpressionOptions -> CommandSeek seek :: MatchExpressionOptions -> CommandSeek
seek o = do seek o = do

View file

@ -50,7 +50,7 @@ data CredPairStorage = CredPairStorage
- -
- The remote's configuration should have already had a cipher stored in it - The remote's configuration should have already had a cipher stored in it
- if that's going to be done, so that the creds can be encrypted using the - if that's going to be done, so that the creds can be encrypted using the
- cipher. The EncryptionIsSetup phantom type ensures that is the case. - cipher. The EncryptionIsSetup is witness to that being the case.
-} -}
setRemoteCredPair :: EncryptionIsSetup -> RemoteConfig -> RemoteGitConfig -> CredPairStorage -> Maybe CredPair -> Annex RemoteConfig setRemoteCredPair :: EncryptionIsSetup -> RemoteConfig -> RemoteGitConfig -> CredPairStorage -> Maybe CredPair -> Annex RemoteConfig
setRemoteCredPair encsetup c gc storage mcreds = case mcreds of setRemoteCredPair encsetup c gc storage mcreds = case mcreds of

View file

@ -90,20 +90,22 @@ matchGlobFile glob = go
where where
cglob = compileGlob glob CaseSensative -- memoized cglob = compileGlob glob CaseSensative -- memoized
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi) go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
go (MatchingInfo af _ _ _) = matchGlob cglob <$> getInfo af go (MatchingInfo p) = matchGlob cglob <$> getInfo (providedFilePath p)
go (MatchingKey _ (AssociatedFile Nothing)) = pure False go (MatchingKey _ (AssociatedFile Nothing)) = pure False
go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af
matchMagic :: Maybe Magic -> MkLimit Annex matchMagic :: String -> (Magic -> FilePath -> IO (Maybe String)) -> (ProvidedInfo -> OptInfo String) -> Maybe Magic -> MkLimit Annex
matchMagic (Just magic) glob = Right $ const go matchMagic _limitname querymagic selectprovidedinfo (Just magic) glob = Right $ const go
where where
cglob = compileGlob glob CaseSensative -- memoized cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _ _) = pure False go (MatchingKey _ _) = pure False
go (MatchingFile fi) = liftIO $ catchBoolIO $ go (MatchingFile fi) = liftIO $ catchBoolIO $
maybe False (matchGlob cglob) maybe False (matchGlob cglob)
<$> getMagicMimeType magic (currFile fi) <$> querymagic magic (currFile fi)
go (MatchingInfo _ _ _ mimeval) = matchGlob cglob <$> getInfo mimeval go (MatchingInfo p) =
matchMagic Nothing _ = Left "unable to load magic database; \"mimetype\" cannot be used" matchGlob cglob <$> getInfo (selectprovidedinfo p)
matchMagic limitname _ _ Nothing _ =
Left $ "unable to load magic database; \""++limitname++"\" cannot be used"
{- Adds a limit to skip files not believed to be present {- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -} - in a specfied repository. Optionally on a prior date. -}
@ -149,7 +151,7 @@ limitInDir dir = const go
go (MatchingFile fi) = checkf $ matchFile fi go (MatchingFile fi) = checkf $ matchFile fi
go (MatchingKey _ (AssociatedFile Nothing)) = return False go (MatchingKey _ (AssociatedFile Nothing)) = return False
go (MatchingKey _ (AssociatedFile (Just af))) = checkf af go (MatchingKey _ (AssociatedFile (Just af))) = checkf af
go (MatchingInfo af _ _ _) = checkf =<< getInfo af go (MatchingInfo p) = checkf =<< getInfo (providedFilePath p)
checkf = return . elem dir . splitPath . takeDirectory checkf = return . elem dir . splitPath . takeDirectory
{- Adds a limit to skip files not believed to have the specified number {- Adds a limit to skip files not believed to have the specified number
@ -197,7 +199,7 @@ limitLackingCopies approx want = case readish want of
else case mi of else case mi of
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
MatchingKey _ _ -> approxNumCopies MatchingKey _ _ -> approxNumCopies
MatchingInfo _ _ _ _ -> approxNumCopies MatchingInfo {} -> approxNumCopies
us <- filter (`S.notMember` notpresent) us <- filter (`S.notMember` notpresent)
<$> (trustExclude UnTrusted =<< Remote.keyLocations key) <$> (trustExclude UnTrusted =<< Remote.keyLocations key)
return $ numcopies - length us >= needed return $ numcopies - length us >= needed
@ -211,8 +213,8 @@ limitLackingCopies approx want = case readish want of
limitUnused :: MatchFiles Annex limitUnused :: MatchFiles Annex
limitUnused _ (MatchingFile _) = return False limitUnused _ (MatchingFile _) = return False
limitUnused _ (MatchingKey k _) = S.member k <$> unusedKeys limitUnused _ (MatchingKey k _) = S.member k <$> unusedKeys
limitUnused _ (MatchingInfo _ ak _ _) = do limitUnused _ (MatchingInfo p) = do
k <- getInfo ak k <- getInfo (providedKey p)
S.member k <$> unusedKeys S.member k <$> unusedKeys
{- Limit that matches any version of any file or key. -} {- Limit that matches any version of any file or key. -}
@ -274,8 +276,9 @@ limitSize vs s = case readSize dataUnits s of
where where
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
go sz _ (MatchingKey key _) = checkkey sz key go sz _ (MatchingKey key _) = checkkey sz key
go sz _ (MatchingInfo _ _ as _) = go sz _ (MatchingInfo p) =
getInfo as >>= \sz' -> return (Just sz' `vs` Just sz) getInfo (providedFileSize p)
>>= \sz' -> return (Just sz' `vs` Just sz)
checkkey sz key = return $ keySize key `vs` Just sz checkkey sz key = return $ keySize key `vs` Just sz
check _ sz (Just key) = checkkey sz key check _ sz (Just key) = checkkey sz key
check fi sz Nothing = do check fi sz Nothing = do
@ -326,4 +329,5 @@ lookupFileKey = lookupFile . currFile
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
checkKey a (MatchingKey k _) = a k checkKey a (MatchingKey k _) = a k
checkKey a (MatchingInfo _ ak _ _) = a =<< getInfo ak checkKey a (MatchingInfo p) = a =<< getInfo (providedKey p)

View file

@ -81,7 +81,7 @@ gen r u c gc = do
cst <- remoteCost gc expensiveRemoteCost cst <- remoteCost gc expensiveRemoteCost
info <- extractS3Info c info <- extractS3Info c
hdl <- mkS3HandleVar c gc u hdl <- mkS3HandleVar c gc u
magic <- liftIO initMagicMimeType magic <- liftIO initMagicMime
return $ new cst info hdl magic return $ new cst info hdl magic
where where
new cst info hdl magic = Just $ specialRemote c new cst info hdl magic = Just $ specialRemote c

View file

@ -1,6 +1,6 @@
{- git-annex file matcher types {- git-annex file matcher types
- -
- Copyright 2013-2016 Joey Hess <id@joeyh.name> - Copyright 2013-2019 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -9,6 +9,7 @@ module Types.FileMatcher where
import Types.UUID (UUID) import Types.UUID (UUID)
import Types.Key (Key, AssociatedFile) import Types.Key (Key, AssociatedFile)
import Types.Mime
import Utility.Matcher (Matcher, Token) import Utility.Matcher (Matcher, Token)
import Utility.FileSize import Utility.FileSize
@ -16,12 +17,11 @@ import Control.Monad.IO.Class
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S import qualified Data.Set as S
-- Information about a file or a key that can be matched on.
data MatchInfo data MatchInfo
= MatchingFile FileInfo = MatchingFile FileInfo
| MatchingKey Key AssociatedFile | MatchingKey Key AssociatedFile
| MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize) (OptInfo MimeType) | MatchingInfo ProvidedInfo
type MimeType = String
data FileInfo = FileInfo data FileInfo = FileInfo
{ currFile :: FilePath { currFile :: FilePath
@ -30,6 +30,16 @@ data FileInfo = FileInfo
-- ^ filepath to match on; may be relative to top of repo or cwd -- ^ filepath to match on; may be relative to top of repo or cwd
} }
-- This is used when testing a matcher, with values to match against
-- provided by the user, rather than queried from files.
data ProvidedInfo = ProvidedInfo
{ providedFilePath :: OptInfo FilePath
, providedKey :: OptInfo Key
, providedFileSize :: OptInfo FileSize
, providedMimeType :: OptInfo MimeType
, providedMimeEncoding :: OptInfo MimeEncoding
}
type OptInfo a = Either (IO a) a type OptInfo a = Either (IO a) a
-- If the OptInfo is not available, accessing it may result in eg an -- If the OptInfo is not available, accessing it may result in eg an

12
Types/Mime.hs Normal file
View file

@ -0,0 +1,12 @@
{- git-annex mime types
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.Mime where
type MimeType = String
type MimeEncoding = String

View file

@ -0,0 +1,49 @@
This is a satellite issue the one on [strange mimetype driven addition](http://git-annex.branchable.com/bugs/manages_to_incorrectly_add_to_annex_instead_of_git_based_on___34__mimetype__34___-_we_cannot_figure_it_out_why/?updated)
We do get report on addition of the file twice (when it switches from annex to git?)
[[!format sh """
(git)smaug:/mnt/btrfs/scrap/tmp/SIMON[master]data_BIDS
$> rm -f TEST.txt; cat BADFILE.txt >| TEST.txt; file --mime TEST.txt; git annex add TEST.txt
TEST.txt: text/plain; charset=utf-8
add TEST.txt ok
(recording state in git...)
$> rm -f TEST.txt; sed -e 's,[{}],,g' BADFILE.txt >| TEST.txt; file --mime TEST.txt; git annex add TEST.txt
TEST.txt: text/plain; charset=utf-8
add TEST.txt (non-large file; adding content to git repository) ok
add TEST.txt (non-large file; adding content to git repository) ok
(recording state in git...)
"""]]
and here is that run with `--debug`:
[[!format sh """
$> rm -f TEST.txt; sed -e 's,[{}],,g' BADFILE.txt >| TEST.txt; file --mime TEST.txt; git annex add --debug TEST.txt
TEST.txt: text/plain; charset=utf-8
[2019-04-26 09:57:45.451632679] read: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","symbolic-ref","-q","HEAD"]
[2019-04-26 09:57:45.455061047] process done ExitSuccess
[2019-04-26 09:57:45.455134123] read: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","show-ref","refs/heads/master"]
[2019-04-26 09:57:45.466478693] process done ExitSuccess
[2019-04-26 09:57:45.466612578] read: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","ls-files","--others","--exclude-standard","-z","--","TEST.txt"]
[2019-04-26 09:57:45.471421295] read: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","ls-files","--modified","-z","--","TEST.txt"]
[2019-04-26 09:57:45.479659995] chat: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","check-attr","-z","--stdin","annex.backend","annex.numcopies","annex.largefiles","--"]
[2019-04-26 09:57:45.479997471] read: git ["--version"]
[2019-04-26 09:57:45.482476867] process done ExitSuccess
add TEST.txt (non-large file; adding content to git repository) ok
[2019-04-26 09:57:45.494729569] read: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","diff","--name-only","--diff-filter=T","-z","--","TEST.txt"]
[2019-04-26 09:57:45.498615035] chat: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","cat-file","--batch"]
[2019-04-26 09:57:45.499023359] chat: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","cat-file","--batch-check=%(objectname) %(objecttype) %(objectsize)"]
add TEST.txt (non-large file; adding content to git repository) ok
(recording state in git...)
[2019-04-26 09:57:45.509449016] feed: xargs ["-0","git","--git-dir=../.git","--work-tree=..","--literal-pathspecs","add","--"]
[2019-04-26 09:57:45.54937333] process done ExitSuccess
[2019-04-26 09:57:45.550142214] process done ExitSuccess
[2019-04-26 09:57:45.550690643] process done ExitSuccess
[2019-04-26 09:57:45.551282227] process done ExitSuccess
"""]]
annex 7.20190219+git191-g2d6a364d4-1~ndall+1

View file

@ -0,0 +1,66 @@
We have found a strange file which for some reason gets added to annex instead of git, although `file --mime` reports it to be a text file. Somehow the possible culprit (we also achieved changed in behavior via different means) is the `{}`
Here is the sample of a BADFILE: http://www.onerussian.com/tmp/BADFILE.txt which gets added to annex instead of git:
[[!format sh """
$> wget http://www.onerussian.com/tmp/BADFILE.txt ; cat .gitattributes; file --mime BAD
...
BADFILE.txt 100%[=======================================================>] 289 --.-KB/s in 0s
* annex.backend=MD5E
* annex.largefiles=(not(mimetype=text/*))
**/.git* annex.largefiles=nothingBADFILE.txt: text/plain; charset=utf-8
add BADFILE.txt ok
(recording state in git...)
$> ls -l BADFILE.txt
lrwxrwxrwx 1 yoh yoh 120 Apr 26 09:43 BADFILE.txt -> .git/annex/objects/xw/3W/MD5E-s289--2aae5dfcc232055ba6c06270b6c6daf0.txt/MD5E-s289--2aae5dfcc232055ba6c06270b6c6daf0.txt
"""]]
so we tried to troubleshoot a bit and here is attempt with removing `{}` chars vs without removing which shows differing behavior:
[[!format sh """
(git)smaug:/mnt/btrfs/scrap/tmp/SIMON[master]data_BIDS
$> cat ../.gitattributes
* annex.backend=MD5E
* annex.largefiles=(not(mimetype=text/*))
**/.git* annex.largefiles=nothing%
$> git reset --hard; rm -f TEST.txt; sed -e 's,[{}],,g' BADFILE.txt >| TEST.txt; file --mime TEST.txt; git annex add TEST.txt
HEAD is now at f97185f badfile into git
TEST.txt: text/plain; charset=utf-8
add TEST.txt (non-large file; adding content to git repository) ok
(recording state in git...)
$> git reset --hard; rm -f TEST.txt; cat BADFILE.txt >| TEST.txt; file --mime TEST.txt; git annex add TEST.txt
HEAD is now at f97185f badfile into git
TEST.txt: text/plain; charset=utf-8
add TEST.txt ok
(recording state in git...)
$> git annex version
git-annex version: 7.20190219+git191-g2d6a364d4-1~ndall+1
build flags: Assistant Webapp Pairing S3(multipartupload)(storageclasses) WebDAV Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite
dependency versions: aws-0.20 bloomfilter-2.0.1.0 cryptonite-0.25 DAV-1.3.3 feed-1.0.0.0 ghc-8.4.4 http-client-0.5.13.1 persistent-sqlite-2.8.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0
key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL
remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar hook external
operating system: linux x86_64
supported repository versions: 5 7
upgrade supported from repository versions: 0 1 2 3 4 5 6
local repository version: 5
$> apt-cache policy git-annex-standalone
git-annex-standalone:
Installed: 7.20190219+git191-g2d6a364d4-1~ndall+1
Candidate: 7.20190219+git191-g2d6a364d4-1~ndall+1
Version table:
*** 7.20190219+git191-g2d6a364d4-1~ndall+1 500
500 http://neuro.debian.net/debian stretch/main amd64 Packages
500 http://neurodebian.ovgu.de/debian stretch/main amd64 Packages
100 /var/lib/dpkg/status
"""]]

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2019-04-26T14:21:31Z"
content="""
On Debian unstable, file --mime says it's application/json.
Since git-annex-standalone bundles the magic database, when it's built on
unstable, it may not match the magic database of the OS, which seems to
explain it.
So I don't think this is a bug?
"""]]

View file

@ -0,0 +1,61 @@
[[!comment format=mdwn
username="yarikoptic"
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
subject="change in libmagic behavior"
date="2019-04-26T15:20:22Z"
content="""
Hi Joey, thanks for the quick reply. After I filed an issue I did realize as well that it must be difference in libmagic1 library version and change in its behavior (!) and indeed it is the case here:
[[!format sh \"\"\"
$> apt-cache policy libmagic1
libmagic1:
Installed: 1:5.30-1+deb9u2
Candidate: 1:5.30-1+deb9u2
Version table:
*** 1:5.30-1+deb9u2 100
100 http://debian.csail.mit.edu/debian stretch/main amd64 Packages
100 /var/lib/dpkg/status
1:5.30-1+deb9u1 100
100 http://security.debian.org stretch/updates/main amd64 Packages
$> file -L --mime BADFILE.txt
BADFILE.txt: text/plain; charset=utf-8
$> LD_PRELOAD=/usr/lib/git-annex.linux/usr/lib/x86_64-linux-gnu/libmagic.so.1 file -L --mime BADFILE.txt
file: compiled magic version [530] does not match with shared library magic version [535]
BADFILE.txt: application/json; charset=utf-8
\"\"\"]]
Standalone I believe was built on buster.
I am not sure how I didn't run into this change of behavior before!
Possibly interesting observation: making it a not kosher json (adding trailing , into one field etc) makes it being detected as text again
But altogether, with this new behavior of libmagic, it begs a new question:
**how (without listing all possible text based file formats) we could instruct annex to treat them as text files?**
There is an `-e` option to `file` to exclude some tests, and I thought excluding `apptype` would help, but it does not
[[!format sh \"\"\"
-e, --exclude TEST exclude TEST from the list of test to be
performed for file. Valid tests are:
apptype, ascii, cdf, compress, elf, encoding,
soft, tar, json, text, tokens
$> file -e apptype --mime BADFILE.txt
BADFILE.txt: application/json; charset=utf-8
\"\"\"]]
There is also `-k, --keep-going don't stop at the first match` so I thought it might list some `text/` but I am not sure what it does:
[[!format sh \"\"\"
$> file -k --mime BADFILE.txt
BADFILE.txt: application/json\012- \012- ; charset=utf-8
$> python -c 'import magic; m=magic.Magic(mime=True, keep_going=True); print(m.from_file(\"BADFILE.txt\"))'
application/json\012- \012-
\"\"\"]]
but this might be just a bug in libmagic...?
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="yarikoptic"
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
subject="update"
date="2019-04-29T13:41:31Z"
content="""
[I have asked on the file/libmagic mailing list](https://mailman.astron.com/pipermail/file/2019-April/000106.html) and filed a [bug report for -k bug](https://bugs.astron.com/view.php?id=77)
"""]]

View file

@ -0,0 +1,16 @@
I've been working on matching up the git history with the history from a
versioned S3 export. Got sidetracked for quite a while building an
efficient way to get the git history up to a certian depth (including all
sides of merge commits) without reading the entire `git log` output.
The history matching is mostly working now, but there's a problem when a
rename is exported to S3, because it's non-atomic on S3 and atomic in git,
and so the histories stop matching up. This is not fatal, just results
in an ugly git history with the right tree at the top of it. It's
not entirely wrong; the git repo and the S3 bucket did legitimately diverge
for a while, so shouldn't the merged history reflect that? The problem is
just that the divergence is not represented in the opimal way.
I hate giving up at the final hurdle, but I feel I need to think about this
some more, so merging `import-from-s3` is postponed for another day, or
likely until Monday.

View file

@ -0,0 +1,16 @@
I could not find a good solution to the S3 history matching problem, so I
think that was the wrong approach. Now I have what seems to be a better
approach implemented: When an import of history from S3 contains some trees
that differ from the trees that were exported to S3, all git-annex needs to
do is make git aware of that, and it can do so by making the remote
tracking branch contain a merge between what was exported to S3 and what
was imported from it.
That does mean that there can be some extra commits generated form an
import, with the same trees as commits that the user made, but a different
message. That seems acceptable. Less so is that repeated imports generate
different commits each time; I need to make it generate stable commits. I
should also add back detection of the simple fast-forward case which was
working but got broken today.
So still not done with this, but the end is in sight!

View file

@ -0,0 +1,19 @@
I'm having troubles trying to setup something like this:
* Setup a git-annex locally on my laptop with multiple special remotes.
* These are mainly free cloud services mounted with git-annex-remote-rclone
* Set the number of copies to 2 (so I can have some redundancy on my annexed files, and spread two different copies into two random special remotes)
* Use my local annex folder, but due to the (potentially) large size of it, be able to "git annex drop some/files" while maintaining the numcopies:2 setting
* Set it up so that once I copy/create a file in the annex folder, automatically replicated twice (since numcopies:2) to any of my special remotes.
So far, I have setup my local repo as repository group: client, and all my special remotes as "incremental backup" (since I do not wanted them to each hold a full backup of my whole annex, since it might be much larger than the available space provided by the cloud provider, Box for example).
I had numcopies set to 2, but this only copies one version of each file to another special remote (I assume the other copy is the one in my local repo)
Setting numcopies to 3, copies to two special remotes (plus my local copy) but "git annex drop some/file" does not work, complaining that only two other copies are available, so it is not working as expected.
I'm sure I'm missing something obvious here but how would I set this up so it works as automatically/transparent as possible?
Maybe I could utilize the "/archive" folder and move files there, but i hate destroying my directory structure by moving files there, and I would rather "drop" the files in place, and "get" them when i need them...
A big thanks to the collective git-annex brain out there... :)

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="Ilya_Shlyakhter"
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
subject="use git-annex-untrust"
date="2019-04-28T17:32:29Z"
content="""
`[[git annex untrust|git-annex-untrust]] here`
will tell git-annex not to count the local copy towards `numcopies`.
"""]]

View file

@ -0,0 +1,3 @@
Hi!
I'm using a rclone special remote for Wasabi, with hybrid encryption. I'd like to switch to S3 remote instead of rclone (as Wasabi is S3-compatible). Is it possible? I'd like to keep the same uploaded files, with the same encryption.

View file

@ -0,0 +1,3 @@
Hi!
I found more than one line per special remote in git-annex:remote.log file. Is it the same for you?

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="Ilya_Shlyakhter"
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
subject="format of log files on the git-annex branch"
date="2019-04-30T17:52:41Z"
content="""
I think only the line with the latest timestamp matters; this format lets [[git-annex-sync]] resolve merge conflicts on the git-annex branch by simply merging the set of lines in the two files. The [[internals]] webpage does refer repeatedly to \"one line per repository\". @joeyh can you clarify?
"""]]

View file

@ -48,6 +48,11 @@ For example, this will exit 0:
Tell what the mime type of the file is. Only needed when using Tell what the mime type of the file is. Only needed when using
--largefiles with a mimetype= expression. --largefiles with a mimetype= expression.
* `--mimeencoding=`
Tell what the mime encoding of the file is. Only needed when using
--largefiles with a mimeencoding= expression.
# SEE ALSO # SEE ALSO
[[git-annex]](1) [[git-annex]](1)

View file

@ -26,5 +26,5 @@ a key. Older versions of git-annex would parse keys with the fields in other
orders (although the name field must always come last), but the current orders (although the name field must always come last), but the current
version requires the fields come in the order shown above. version requires the fields come in the order shown above.
The `git annex examinekey` command can be used to extract information from The [[`git annex examinekey`|git-annex-examinekey]] command can be used to extract information from
a key. a key.

View file

@ -63,7 +63,7 @@ The following terms can be used in annex.largefiles:
Looks up the MIME type of a file, and checks if the glob matches it. Looks up the MIME type of a file, and checks if the glob matches it.
For example, "mimetype=text/*" will match many varieties of text files, For example, `"mimetype=text/*"` will match many varieties of text files,
including "text/plain", but also "text/x-shellscript", "text/x-makefile", including "text/plain", but also "text/x-shellscript", "text/x-makefile",
etc. etc.
@ -72,6 +72,18 @@ The following terms can be used in annex.largefiles:
This is only available to use when git-annex was built with the This is only available to use when git-annex was built with the
MagicMime build flag. MagicMime build flag.
* `mimeencoding=glob`
Looks up the MIME encoding of a file, and checks if the glob matches it.
For example, `"mimeencoding=binary"` will match many kinds of binary
files.
The MIME encodings are the same that are displayed by running `file --mime-encoding`
This is only available to use when git-annex was built with the
MagicMime build flag.
* `anything` * `anything`
Matches any file. Matches any file.

View file

@ -1,3 +1,17 @@
If git-tracked files are removed from the remote, they don't get synced over after a "git annex fsck" and "git annex export". If git-tracked files are removed from the remote, they don't get synced over after a "git annex fsck" and "git annex export".
Is there some way that they could make it to the remote? I'm imagining an rsync-like behavior to copy over files that have different time stamps or file sizes. Would such a feature be welcome in git annex? Is there some way that they could make it to the remote? I'm imagining an rsync-like behavior to copy over files that have different time stamps or file sizes. Would such a feature be welcome in git annex?
> Since git-annex 6.20180626, `git annex fsck --from` an exporttree=yes remote
> will notice if files on it have been deleted, and then
> `git annex sync --content` or `git-annex export` will re-upload them.
>
> But perhaps more interesting, if the remote is also configured with
> importtree=yes, `git-annex import` from it can now notice deletions
> as well as other changes to the content on the remote, and make a remote
> tracking branch in git reflecting the changes. You can then merge or
> revert the changes and export or sync can be used to put the deleted
> files back on the remote if desired.
>
> Only a subset of remotes support importree, but the fsck method
> will work for all. So, this is [[done]]. --[[Joey]]

View file

@ -0,0 +1,5 @@
ATM there is no `--json-progress` in `git annex add` (only `--json`), so no feedback to the user could be provided on ETA etc. Would be nice to have `--json-progress` there to echo similar one for `get` and `copy`.
Cheers!
[[!meta author=yoh]]

View file

@ -0,0 +1,11 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2019-04-26T14:17:58Z"
content="""
First things first.. There is no progress of any kind for add of an
individual item.
This would need changes to the Backend interface so it can display progress
while hashing..
"""]]

View file

@ -0,0 +1 @@
Would it be hard to add a variantion to checksumming [[backends]], that would change how the checksum is computed: instead of computing it on the whole file, it would first be computed on file chunks of given size, and then the final checksum computed on the concatenation of the chunk checksums? You'd add a new [[key field|internals/key_format]], say cNNNNN, specifying the chunking size (the last chunk might be shorter). Then (1) for large files, checksum computation could be parallelized (there could be a config option specifying the default chunk size for newly added files); (2) I often have large files on a remote, for which I have md5 for each chunk, but not for the full file; this would enable me to register the location of these fies with git-annex without downloading them, while still using a checksum-based key.

View file

@ -987,6 +987,7 @@ Executable git-annex
Types.LockCache Types.LockCache
Types.Messages Types.Messages
Types.MetaData Types.MetaData
Types.Mime
Types.NumCopies Types.NumCopies
Types.RefSpec Types.RefSpec
Types.Remote Types.Remote