Merge branch 'master' into import-from-s3
This commit is contained in:
commit
700a3f2787
29 changed files with 426 additions and 43 deletions
|
@ -135,14 +135,27 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
|
|||
|
||||
mkLargeFilesParser :: Annex (String -> [ParseResult])
|
||||
mkLargeFilesParser = do
|
||||
magicmime <- liftIO initMagicMimeType
|
||||
let parse = parseToken $ commonTokens
|
||||
magicmime <- liftIO initMagicMime
|
||||
#ifdef WITH_MAGICMIME
|
||||
++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ]
|
||||
let mimer n f = ValueToken n (usev $ f magicmime)
|
||||
#else
|
||||
++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ]
|
||||
let mimer n = ValueToken n (const $ Left "\""++n++"\" not supported; not built with MagicMime support")
|
||||
#endif
|
||||
let parse = parseToken $ commonTokens ++
|
||||
#ifdef WITH_MAGICMIME
|
||||
[ mimer "mimetype" $
|
||||
matchMagic "mimetype" getMagicMimeType providedMimeType
|
||||
, mimer "mimeencoding" $
|
||||
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding
|
||||
]
|
||||
#else
|
||||
[ mimer "mimetype"
|
||||
, mimer "mimeencoding"
|
||||
,
|
||||
]
|
||||
#endif
|
||||
return $ map parse . tokenizeMatcher
|
||||
where
|
||||
|
||||
{- Generates a matcher for files large enough (or meeting other criteria)
|
||||
- to be added to the annex, rather than directly to git. -}
|
||||
|
|
|
@ -10,8 +10,10 @@
|
|||
module Annex.Magic (
|
||||
Magic,
|
||||
MimeType,
|
||||
initMagicMimeType,
|
||||
MimeEncoding,
|
||||
initMagicMime,
|
||||
getMagicMimeType,
|
||||
getMagicMimeEncoding,
|
||||
) where
|
||||
|
||||
#ifdef WITH_MAGICMIME
|
||||
|
@ -21,25 +23,37 @@ import Common
|
|||
#else
|
||||
type Magic = ()
|
||||
#endif
|
||||
import Types.Mime
|
||||
|
||||
initMagicMimeType :: IO (Maybe Magic)
|
||||
initMagicMime :: IO (Maybe Magic)
|
||||
#ifdef WITH_MAGICMIME
|
||||
initMagicMimeType = catchMaybeIO $ do
|
||||
m <- magicOpen [MagicMimeType]
|
||||
initMagicMime = catchMaybeIO $ do
|
||||
m <- magicOpen [MagicMime]
|
||||
liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case
|
||||
Nothing -> magicLoadDefault m
|
||||
Just d -> magicLoad m
|
||||
(d </> "magic" </> "magic.mgc")
|
||||
return m
|
||||
#else
|
||||
initMagicMimeType = return Nothing
|
||||
initMagicMime = return Nothing
|
||||
#endif
|
||||
|
||||
type MimeType = String
|
||||
getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding))
|
||||
#ifdef WITH_MAGICMIME
|
||||
getMagicMime m f = Just . parse <$> magicFile m f
|
||||
where
|
||||
parse s =
|
||||
let (mimetype, rest) = separate (== ';') s
|
||||
in case rest of
|
||||
(' ':'c':'h':'a':'r':'s':'e':'t':'=':mimeencoding) ->
|
||||
(mimetype, mimeencoding)
|
||||
_ -> (mimetype, "")
|
||||
#else
|
||||
getMagicMime _ _ = return Nothing
|
||||
#endif
|
||||
|
||||
getMagicMimeType :: Magic -> FilePath -> IO (Maybe MimeType)
|
||||
#ifdef WITH_MAGICMIME
|
||||
getMagicMimeType m f = Just <$> magicFile m f
|
||||
#else
|
||||
getMagicMimeType _ _ = return Nothing
|
||||
#endif
|
||||
getMagicMimeType m f = fmap fst <$> getMagicMime m f
|
||||
|
||||
getMagicMimeEncoding :: Magic -> FilePath -> IO (Maybe MimeEncoding)
|
||||
getMagicMimeEncoding m f = fmap snd <$> getMagicMime m f
|
||||
|
|
|
@ -24,6 +24,10 @@ git-annex (7.20190323) UNRELEASED; urgency=medium
|
|||
of an old remote for something new.
|
||||
* Drop support for building with aws older than 0.14.
|
||||
* info: Show when a remote is configured with importtree.
|
||||
* Added mimeencoding= term to annex.largefiles expressions.
|
||||
This is probably mostly useful to match non-text files with eg
|
||||
"mimeencoding=binary"
|
||||
* git-annex matchexpression: Added --mimeencoding option.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 09 Apr 2019 14:07:53 -0400
|
||||
|
||||
|
|
|
@ -38,9 +38,9 @@ optParser desc = MatchExpressionOptions
|
|||
( long "largefiles"
|
||||
<> help "parse as annex.largefiles expression"
|
||||
)
|
||||
<*> (addkeysize <$> dataparser)
|
||||
<*> (MatchingInfo . addkeysize <$> dataparser)
|
||||
where
|
||||
dataparser = MatchingInfo
|
||||
dataparser = ProvidedInfo
|
||||
<$> optinfo "file" (strOption
|
||||
( long "file" <> metavar paramFile
|
||||
<> help "specify filename to match against"
|
||||
|
@ -57,15 +57,20 @@ optParser desc = MatchExpressionOptions
|
|||
( long "mimetype" <> metavar paramValue
|
||||
<> help "specify mime type to match against"
|
||||
))
|
||||
<*> optinfo "mimeencoding" (strOption
|
||||
( long "mimeencoding" <> metavar paramValue
|
||||
<> help "specify mime encoding to match against"
|
||||
))
|
||||
|
||||
optinfo datadesc mk = (Right <$> mk)
|
||||
<|> (pure $ Left $ missingdata datadesc)
|
||||
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
|
||||
-- When a key is provided, use its size.
|
||||
addkeysize i@(MatchingInfo f (Right k) _ m) = case keySize k of
|
||||
Just sz -> MatchingInfo f (Right k) (Right sz) m
|
||||
Nothing -> i
|
||||
addkeysize i = i
|
||||
-- When a key is provided, make its size also be provided.
|
||||
addkeysize p = case providedKey p of
|
||||
Right k -> case keySize k of
|
||||
Just sz -> p { providedFileSize = Right sz }
|
||||
Nothing -> p
|
||||
Left _ -> p
|
||||
|
||||
seek :: MatchExpressionOptions -> CommandSeek
|
||||
seek o = do
|
||||
|
|
2
Creds.hs
2
Creds.hs
|
@ -50,7 +50,7 @@ data CredPairStorage = CredPairStorage
|
|||
-
|
||||
- The remote's configuration should have already had a cipher stored in it
|
||||
- if that's going to be done, so that the creds can be encrypted using the
|
||||
- cipher. The EncryptionIsSetup phantom type ensures that is the case.
|
||||
- cipher. The EncryptionIsSetup is witness to that being the case.
|
||||
-}
|
||||
setRemoteCredPair :: EncryptionIsSetup -> RemoteConfig -> RemoteGitConfig -> CredPairStorage -> Maybe CredPair -> Annex RemoteConfig
|
||||
setRemoteCredPair encsetup c gc storage mcreds = case mcreds of
|
||||
|
|
30
Limit.hs
30
Limit.hs
|
@ -90,20 +90,22 @@ matchGlobFile glob = go
|
|||
where
|
||||
cglob = compileGlob glob CaseSensative -- memoized
|
||||
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
|
||||
go (MatchingInfo af _ _ _) = matchGlob cglob <$> getInfo af
|
||||
go (MatchingInfo p) = matchGlob cglob <$> getInfo (providedFilePath p)
|
||||
go (MatchingKey _ (AssociatedFile Nothing)) = pure False
|
||||
go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af
|
||||
|
||||
matchMagic :: Maybe Magic -> MkLimit Annex
|
||||
matchMagic (Just magic) glob = Right $ const go
|
||||
matchMagic :: String -> (Magic -> FilePath -> IO (Maybe String)) -> (ProvidedInfo -> OptInfo String) -> Maybe Magic -> MkLimit Annex
|
||||
matchMagic _limitname querymagic selectprovidedinfo (Just magic) glob = Right $ const go
|
||||
where
|
||||
cglob = compileGlob glob CaseSensative -- memoized
|
||||
go (MatchingKey _ _) = pure False
|
||||
go (MatchingFile fi) = liftIO $ catchBoolIO $
|
||||
maybe False (matchGlob cglob)
|
||||
<$> getMagicMimeType magic (currFile fi)
|
||||
go (MatchingInfo _ _ _ mimeval) = matchGlob cglob <$> getInfo mimeval
|
||||
matchMagic Nothing _ = Left "unable to load magic database; \"mimetype\" cannot be used"
|
||||
<$> querymagic magic (currFile fi)
|
||||
go (MatchingInfo p) =
|
||||
matchGlob cglob <$> getInfo (selectprovidedinfo p)
|
||||
matchMagic limitname _ _ Nothing _ =
|
||||
Left $ "unable to load magic database; \""++limitname++"\" cannot be used"
|
||||
|
||||
{- Adds a limit to skip files not believed to be present
|
||||
- in a specfied repository. Optionally on a prior date. -}
|
||||
|
@ -149,7 +151,7 @@ limitInDir dir = const go
|
|||
go (MatchingFile fi) = checkf $ matchFile fi
|
||||
go (MatchingKey _ (AssociatedFile Nothing)) = return False
|
||||
go (MatchingKey _ (AssociatedFile (Just af))) = checkf af
|
||||
go (MatchingInfo af _ _ _) = checkf =<< getInfo af
|
||||
go (MatchingInfo p) = checkf =<< getInfo (providedFilePath p)
|
||||
checkf = return . elem dir . splitPath . takeDirectory
|
||||
|
||||
{- Adds a limit to skip files not believed to have the specified number
|
||||
|
@ -197,7 +199,7 @@ limitLackingCopies approx want = case readish want of
|
|||
else case mi of
|
||||
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
|
||||
MatchingKey _ _ -> approxNumCopies
|
||||
MatchingInfo _ _ _ _ -> approxNumCopies
|
||||
MatchingInfo {} -> approxNumCopies
|
||||
us <- filter (`S.notMember` notpresent)
|
||||
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
|
||||
return $ numcopies - length us >= needed
|
||||
|
@ -211,8 +213,8 @@ limitLackingCopies approx want = case readish want of
|
|||
limitUnused :: MatchFiles Annex
|
||||
limitUnused _ (MatchingFile _) = return False
|
||||
limitUnused _ (MatchingKey k _) = S.member k <$> unusedKeys
|
||||
limitUnused _ (MatchingInfo _ ak _ _) = do
|
||||
k <- getInfo ak
|
||||
limitUnused _ (MatchingInfo p) = do
|
||||
k <- getInfo (providedKey p)
|
||||
S.member k <$> unusedKeys
|
||||
|
||||
{- Limit that matches any version of any file or key. -}
|
||||
|
@ -274,8 +276,9 @@ limitSize vs s = case readSize dataUnits s of
|
|||
where
|
||||
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
|
||||
go sz _ (MatchingKey key _) = checkkey sz key
|
||||
go sz _ (MatchingInfo _ _ as _) =
|
||||
getInfo as >>= \sz' -> return (Just sz' `vs` Just sz)
|
||||
go sz _ (MatchingInfo p) =
|
||||
getInfo (providedFileSize p)
|
||||
>>= \sz' -> return (Just sz' `vs` Just sz)
|
||||
checkkey sz key = return $ keySize key `vs` Just sz
|
||||
check _ sz (Just key) = checkkey sz key
|
||||
check fi sz Nothing = do
|
||||
|
@ -326,4 +329,5 @@ lookupFileKey = lookupFile . currFile
|
|||
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
|
||||
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
|
||||
checkKey a (MatchingKey k _) = a k
|
||||
checkKey a (MatchingInfo _ ak _ _) = a =<< getInfo ak
|
||||
checkKey a (MatchingInfo p) = a =<< getInfo (providedKey p)
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@ gen r u c gc = do
|
|||
cst <- remoteCost gc expensiveRemoteCost
|
||||
info <- extractS3Info c
|
||||
hdl <- mkS3HandleVar c gc u
|
||||
magic <- liftIO initMagicMimeType
|
||||
magic <- liftIO initMagicMime
|
||||
return $ new cst info hdl magic
|
||||
where
|
||||
new cst info hdl magic = Just $ specialRemote c
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex file matcher types
|
||||
-
|
||||
- Copyright 2013-2016 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -9,6 +9,7 @@ module Types.FileMatcher where
|
|||
|
||||
import Types.UUID (UUID)
|
||||
import Types.Key (Key, AssociatedFile)
|
||||
import Types.Mime
|
||||
import Utility.Matcher (Matcher, Token)
|
||||
import Utility.FileSize
|
||||
|
||||
|
@ -16,12 +17,11 @@ import Control.Monad.IO.Class
|
|||
import qualified Data.Map as M
|
||||
import qualified Data.Set as S
|
||||
|
||||
-- Information about a file or a key that can be matched on.
|
||||
data MatchInfo
|
||||
= MatchingFile FileInfo
|
||||
| MatchingKey Key AssociatedFile
|
||||
| MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize) (OptInfo MimeType)
|
||||
|
||||
type MimeType = String
|
||||
| MatchingInfo ProvidedInfo
|
||||
|
||||
data FileInfo = FileInfo
|
||||
{ currFile :: FilePath
|
||||
|
@ -30,6 +30,16 @@ data FileInfo = FileInfo
|
|||
-- ^ filepath to match on; may be relative to top of repo or cwd
|
||||
}
|
||||
|
||||
-- This is used when testing a matcher, with values to match against
|
||||
-- provided by the user, rather than queried from files.
|
||||
data ProvidedInfo = ProvidedInfo
|
||||
{ providedFilePath :: OptInfo FilePath
|
||||
, providedKey :: OptInfo Key
|
||||
, providedFileSize :: OptInfo FileSize
|
||||
, providedMimeType :: OptInfo MimeType
|
||||
, providedMimeEncoding :: OptInfo MimeEncoding
|
||||
}
|
||||
|
||||
type OptInfo a = Either (IO a) a
|
||||
|
||||
-- If the OptInfo is not available, accessing it may result in eg an
|
||||
|
|
12
Types/Mime.hs
Normal file
12
Types/Mime.hs
Normal file
|
@ -0,0 +1,12 @@
|
|||
{- git-annex mime types
|
||||
-
|
||||
- Copyright 2019 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Types.Mime where
|
||||
|
||||
type MimeType = String
|
||||
|
||||
type MimeEncoding = String
|
|
@ -0,0 +1,49 @@
|
|||
This is a satellite issue the one on [strange mimetype driven addition](http://git-annex.branchable.com/bugs/manages_to_incorrectly_add_to_annex_instead_of_git_based_on___34__mimetype__34___-_we_cannot_figure_it_out_why/?updated)
|
||||
|
||||
We do get report on addition of the file twice (when it switches from annex to git?)
|
||||
|
||||
[[!format sh """
|
||||
(git)smaug:/mnt/btrfs/scrap/tmp/SIMON[master]data_BIDS
|
||||
$> rm -f TEST.txt; cat BADFILE.txt >| TEST.txt; file --mime TEST.txt; git annex add TEST.txt
|
||||
TEST.txt: text/plain; charset=utf-8
|
||||
add TEST.txt ok
|
||||
(recording state in git...)
|
||||
|
||||
$> rm -f TEST.txt; sed -e 's,[{}],,g' BADFILE.txt >| TEST.txt; file --mime TEST.txt; git annex add TEST.txt
|
||||
TEST.txt: text/plain; charset=utf-8
|
||||
add TEST.txt (non-large file; adding content to git repository) ok
|
||||
add TEST.txt (non-large file; adding content to git repository) ok
|
||||
(recording state in git...)
|
||||
|
||||
"""]]
|
||||
|
||||
and here is that run with `--debug`:
|
||||
|
||||
[[!format sh """
|
||||
$> rm -f TEST.txt; sed -e 's,[{}],,g' BADFILE.txt >| TEST.txt; file --mime TEST.txt; git annex add --debug TEST.txt
|
||||
TEST.txt: text/plain; charset=utf-8
|
||||
[2019-04-26 09:57:45.451632679] read: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","symbolic-ref","-q","HEAD"]
|
||||
[2019-04-26 09:57:45.455061047] process done ExitSuccess
|
||||
[2019-04-26 09:57:45.455134123] read: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","show-ref","refs/heads/master"]
|
||||
[2019-04-26 09:57:45.466478693] process done ExitSuccess
|
||||
[2019-04-26 09:57:45.466612578] read: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","ls-files","--others","--exclude-standard","-z","--","TEST.txt"]
|
||||
[2019-04-26 09:57:45.471421295] read: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","ls-files","--modified","-z","--","TEST.txt"]
|
||||
[2019-04-26 09:57:45.479659995] chat: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","check-attr","-z","--stdin","annex.backend","annex.numcopies","annex.largefiles","--"]
|
||||
[2019-04-26 09:57:45.479997471] read: git ["--version"]
|
||||
[2019-04-26 09:57:45.482476867] process done ExitSuccess
|
||||
add TEST.txt (non-large file; adding content to git repository) ok
|
||||
[2019-04-26 09:57:45.494729569] read: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","diff","--name-only","--diff-filter=T","-z","--","TEST.txt"]
|
||||
[2019-04-26 09:57:45.498615035] chat: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","cat-file","--batch"]
|
||||
[2019-04-26 09:57:45.499023359] chat: git ["--git-dir=../.git","--work-tree=..","--literal-pathspecs","cat-file","--batch-check=%(objectname) %(objecttype) %(objectsize)"]
|
||||
add TEST.txt (non-large file; adding content to git repository) ok
|
||||
(recording state in git...)
|
||||
[2019-04-26 09:57:45.509449016] feed: xargs ["-0","git","--git-dir=../.git","--work-tree=..","--literal-pathspecs","add","--"]
|
||||
[2019-04-26 09:57:45.54937333] process done ExitSuccess
|
||||
[2019-04-26 09:57:45.550142214] process done ExitSuccess
|
||||
[2019-04-26 09:57:45.550690643] process done ExitSuccess
|
||||
[2019-04-26 09:57:45.551282227] process done ExitSuccess
|
||||
|
||||
|
||||
"""]]
|
||||
|
||||
annex 7.20190219+git191-g2d6a364d4-1~ndall+1
|
|
@ -0,0 +1,66 @@
|
|||
We have found a strange file which for some reason gets added to annex instead of git, although `file --mime` reports it to be a text file. Somehow the possible culprit (we also achieved changed in behavior via different means) is the `{}`
|
||||
|
||||
Here is the sample of a BADFILE: http://www.onerussian.com/tmp/BADFILE.txt which gets added to annex instead of git:
|
||||
|
||||
[[!format sh """
|
||||
$> wget http://www.onerussian.com/tmp/BADFILE.txt ; cat .gitattributes; file --mime BAD
|
||||
...
|
||||
BADFILE.txt 100%[=======================================================>] 289 --.-KB/s in 0s
|
||||
|
||||
|
||||
* annex.backend=MD5E
|
||||
* annex.largefiles=(not(mimetype=text/*))
|
||||
**/.git* annex.largefiles=nothingBADFILE.txt: text/plain; charset=utf-8
|
||||
add BADFILE.txt ok
|
||||
(recording state in git...)
|
||||
|
||||
$> ls -l BADFILE.txt
|
||||
lrwxrwxrwx 1 yoh yoh 120 Apr 26 09:43 BADFILE.txt -> .git/annex/objects/xw/3W/MD5E-s289--2aae5dfcc232055ba6c06270b6c6daf0.txt/MD5E-s289--2aae5dfcc232055ba6c06270b6c6daf0.txt
|
||||
|
||||
"""]]
|
||||
|
||||
so we tried to troubleshoot a bit and here is attempt with removing `{}` chars vs without removing which shows differing behavior:
|
||||
|
||||
[[!format sh """
|
||||
(git)smaug:/mnt/btrfs/scrap/tmp/SIMON[master]data_BIDS
|
||||
$> cat ../.gitattributes
|
||||
|
||||
* annex.backend=MD5E
|
||||
* annex.largefiles=(not(mimetype=text/*))
|
||||
**/.git* annex.largefiles=nothing%
|
||||
|
||||
$> git reset --hard; rm -f TEST.txt; sed -e 's,[{}],,g' BADFILE.txt >| TEST.txt; file --mime TEST.txt; git annex add TEST.txt
|
||||
HEAD is now at f97185f badfile into git
|
||||
TEST.txt: text/plain; charset=utf-8
|
||||
add TEST.txt (non-large file; adding content to git repository) ok
|
||||
(recording state in git...)
|
||||
|
||||
$> git reset --hard; rm -f TEST.txt; cat BADFILE.txt >| TEST.txt; file --mime TEST.txt; git annex add TEST.txt
|
||||
HEAD is now at f97185f badfile into git
|
||||
TEST.txt: text/plain; charset=utf-8
|
||||
add TEST.txt ok
|
||||
(recording state in git...)
|
||||
|
||||
$> git annex version
|
||||
git-annex version: 7.20190219+git191-g2d6a364d4-1~ndall+1
|
||||
build flags: Assistant Webapp Pairing S3(multipartupload)(storageclasses) WebDAV Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite
|
||||
dependency versions: aws-0.20 bloomfilter-2.0.1.0 cryptonite-0.25 DAV-1.3.3 feed-1.0.0.0 ghc-8.4.4 http-client-0.5.13.1 persistent-sqlite-2.8.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0
|
||||
key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL
|
||||
remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar hook external
|
||||
operating system: linux x86_64
|
||||
supported repository versions: 5 7
|
||||
upgrade supported from repository versions: 0 1 2 3 4 5 6
|
||||
local repository version: 5
|
||||
|
||||
$> apt-cache policy git-annex-standalone
|
||||
git-annex-standalone:
|
||||
Installed: 7.20190219+git191-g2d6a364d4-1~ndall+1
|
||||
Candidate: 7.20190219+git191-g2d6a364d4-1~ndall+1
|
||||
Version table:
|
||||
*** 7.20190219+git191-g2d6a364d4-1~ndall+1 500
|
||||
500 http://neuro.debian.net/debian stretch/main amd64 Packages
|
||||
500 http://neurodebian.ovgu.de/debian stretch/main amd64 Packages
|
||||
100 /var/lib/dpkg/status
|
||||
|
||||
"""]]
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2019-04-26T14:21:31Z"
|
||||
content="""
|
||||
On Debian unstable, file --mime says it's application/json.
|
||||
|
||||
Since git-annex-standalone bundles the magic database, when it's built on
|
||||
unstable, it may not match the magic database of the OS, which seems to
|
||||
explain it.
|
||||
|
||||
So I don't think this is a bug?
|
||||
"""]]
|
|
@ -0,0 +1,61 @@
|
|||
[[!comment format=mdwn
|
||||
username="yarikoptic"
|
||||
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
|
||||
subject="change in libmagic behavior"
|
||||
date="2019-04-26T15:20:22Z"
|
||||
content="""
|
||||
Hi Joey, thanks for the quick reply. After I filed an issue I did realize as well that it must be difference in libmagic1 library version and change in its behavior (!) and indeed it is the case here:
|
||||
|
||||
[[!format sh \"\"\"
|
||||
$> apt-cache policy libmagic1
|
||||
libmagic1:
|
||||
Installed: 1:5.30-1+deb9u2
|
||||
Candidate: 1:5.30-1+deb9u2
|
||||
Version table:
|
||||
*** 1:5.30-1+deb9u2 100
|
||||
100 http://debian.csail.mit.edu/debian stretch/main amd64 Packages
|
||||
100 /var/lib/dpkg/status
|
||||
1:5.30-1+deb9u1 100
|
||||
100 http://security.debian.org stretch/updates/main amd64 Packages
|
||||
|
||||
$> file -L --mime BADFILE.txt
|
||||
BADFILE.txt: text/plain; charset=utf-8
|
||||
|
||||
$> LD_PRELOAD=/usr/lib/git-annex.linux/usr/lib/x86_64-linux-gnu/libmagic.so.1 file -L --mime BADFILE.txt
|
||||
file: compiled magic version [530] does not match with shared library magic version [535]
|
||||
BADFILE.txt: application/json; charset=utf-8
|
||||
|
||||
\"\"\"]]
|
||||
|
||||
Standalone I believe was built on buster.
|
||||
I am not sure how I didn't run into this change of behavior before!
|
||||
|
||||
Possibly interesting observation: making it a not kosher json (adding trailing , into one field etc) makes it being detected as text again
|
||||
|
||||
But altogether, with this new behavior of libmagic, it begs a new question:
|
||||
|
||||
**how (without listing all possible text based file formats) we could instruct annex to treat them as text files?**
|
||||
|
||||
There is an `-e` option to `file` to exclude some tests, and I thought excluding `apptype` would help, but it does not
|
||||
[[!format sh \"\"\"
|
||||
-e, --exclude TEST exclude TEST from the list of test to be
|
||||
performed for file. Valid tests are:
|
||||
apptype, ascii, cdf, compress, elf, encoding,
|
||||
soft, tar, json, text, tokens
|
||||
|
||||
$> file -e apptype --mime BADFILE.txt
|
||||
BADFILE.txt: application/json; charset=utf-8
|
||||
\"\"\"]]
|
||||
|
||||
There is also `-k, --keep-going don't stop at the first match` so I thought it might list some `text/` but I am not sure what it does:
|
||||
|
||||
[[!format sh \"\"\"
|
||||
$> file -k --mime BADFILE.txt
|
||||
BADFILE.txt: application/json\012- \012- ; charset=utf-8
|
||||
|
||||
$> python -c 'import magic; m=magic.Magic(mime=True, keep_going=True); print(m.from_file(\"BADFILE.txt\"))'
|
||||
application/json\012- \012-
|
||||
\"\"\"]]
|
||||
|
||||
but this might be just a bug in libmagic...?
|
||||
"""]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="yarikoptic"
|
||||
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
|
||||
subject="update"
|
||||
date="2019-04-29T13:41:31Z"
|
||||
content="""
|
||||
[I have asked on the file/libmagic mailing list](https://mailman.astron.com/pipermail/file/2019-April/000106.html) and filed a [bug report for -k bug](https://bugs.astron.com/view.php?id=77)
|
||||
"""]]
|
16
doc/devblog/day_584__matching_S3_histories.mdwn
Normal file
16
doc/devblog/day_584__matching_S3_histories.mdwn
Normal file
|
@ -0,0 +1,16 @@
|
|||
I've been working on matching up the git history with the history from a
|
||||
versioned S3 export. Got sidetracked for quite a while building an
|
||||
efficient way to get the git history up to a certian depth (including all
|
||||
sides of merge commits) without reading the entire `git log` output.
|
||||
|
||||
The history matching is mostly working now, but there's a problem when a
|
||||
rename is exported to S3, because it's non-atomic on S3 and atomic in git,
|
||||
and so the histories stop matching up. This is not fatal, just results
|
||||
in an ugly git history with the right tree at the top of it. It's
|
||||
not entirely wrong; the git repo and the S3 bucket did legitimately diverge
|
||||
for a while, so shouldn't the merged history reflect that? The problem is
|
||||
just that the divergence is not represented in the opimal way.
|
||||
|
||||
I hate giving up at the final hurdle, but I feel I need to think about this
|
||||
some more, so merging `import-from-s3` is postponed for another day, or
|
||||
likely until Monday.
|
16
doc/devblog/day_585__not_matching_S3_histories.mdwn
Normal file
16
doc/devblog/day_585__not_matching_S3_histories.mdwn
Normal file
|
@ -0,0 +1,16 @@
|
|||
I could not find a good solution to the S3 history matching problem, so I
|
||||
think that was the wrong approach. Now I have what seems to be a better
|
||||
approach implemented: When an import of history from S3 contains some trees
|
||||
that differ from the trees that were exported to S3, all git-annex needs to
|
||||
do is make git aware of that, and it can do so by making the remote
|
||||
tracking branch contain a merge between what was exported to S3 and what
|
||||
was imported from it.
|
||||
|
||||
That does mean that there can be some extra commits generated form an
|
||||
import, with the same trees as commits that the user made, but a different
|
||||
message. That seems acceptable. Less so is that repeated imports generate
|
||||
different commits each time; I need to make it generate stable commits. I
|
||||
should also add back detection of the simple fast-forward case which was
|
||||
working but got broken today.
|
||||
|
||||
So still not done with this, but the end is in sight!
|
19
doc/forum/Assistance_required_for_a_specific_workflow.mdwn
Normal file
19
doc/forum/Assistance_required_for_a_specific_workflow.mdwn
Normal file
|
@ -0,0 +1,19 @@
|
|||
I'm having troubles trying to setup something like this:
|
||||
|
||||
* Setup a git-annex locally on my laptop with multiple special remotes.
|
||||
* These are mainly free cloud services mounted with git-annex-remote-rclone
|
||||
* Set the number of copies to 2 (so I can have some redundancy on my annexed files, and spread two different copies into two random special remotes)
|
||||
* Use my local annex folder, but due to the (potentially) large size of it, be able to "git annex drop some/files" while maintaining the numcopies:2 setting
|
||||
* Set it up so that once I copy/create a file in the annex folder, automatically replicated twice (since numcopies:2) to any of my special remotes.
|
||||
|
||||
So far, I have setup my local repo as repository group: client, and all my special remotes as "incremental backup" (since I do not wanted them to each hold a full backup of my whole annex, since it might be much larger than the available space provided by the cloud provider, Box for example).
|
||||
|
||||
I had numcopies set to 2, but this only copies one version of each file to another special remote (I assume the other copy is the one in my local repo)
|
||||
|
||||
Setting numcopies to 3, copies to two special remotes (plus my local copy) but "git annex drop some/file" does not work, complaining that only two other copies are available, so it is not working as expected.
|
||||
|
||||
I'm sure I'm missing something obvious here but how would I set this up so it works as automatically/transparent as possible?
|
||||
|
||||
Maybe I could utilize the "/archive" folder and move files there, but i hate destroying my directory structure by moving files there, and I would rather "drop" the files in place, and "get" them when i need them...
|
||||
|
||||
A big thanks to the collective git-annex brain out there... :)
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="use git-annex-untrust"
|
||||
date="2019-04-28T17:32:29Z"
|
||||
content="""
|
||||
`[[git annex untrust|git-annex-untrust]] here`
|
||||
|
||||
will tell git-annex not to count the local copy towards `numcopies`.
|
||||
"""]]
|
3
doc/forum/Replacing_a_special_remote_by_another_one.mdwn
Normal file
3
doc/forum/Replacing_a_special_remote_by_another_one.mdwn
Normal file
|
@ -0,0 +1,3 @@
|
|||
Hi!
|
||||
|
||||
I'm using a rclone special remote for Wasabi, with hybrid encryption. I'd like to switch to S3 remote instead of rclone (as Wasabi is S3-compatible). Is it possible? I'd like to keep the same uploaded files, with the same encryption.
|
|
@ -0,0 +1,3 @@
|
|||
Hi!
|
||||
|
||||
I found more than one line per special remote in git-annex:remote.log file. Is it the same for you?
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="format of log files on the git-annex branch"
|
||||
date="2019-04-30T17:52:41Z"
|
||||
content="""
|
||||
I think only the line with the latest timestamp matters; this format lets [[git-annex-sync]] resolve merge conflicts on the git-annex branch by simply merging the set of lines in the two files. The [[internals]] webpage does refer repeatedly to \"one line per repository\". @joeyh can you clarify?
|
||||
"""]]
|
|
@ -48,6 +48,11 @@ For example, this will exit 0:
|
|||
Tell what the mime type of the file is. Only needed when using
|
||||
--largefiles with a mimetype= expression.
|
||||
|
||||
* `--mimeencoding=`
|
||||
|
||||
Tell what the mime encoding of the file is. Only needed when using
|
||||
--largefiles with a mimeencoding= expression.
|
||||
|
||||
# SEE ALSO
|
||||
|
||||
[[git-annex]](1)
|
||||
|
|
|
@ -26,5 +26,5 @@ a key. Older versions of git-annex would parse keys with the fields in other
|
|||
orders (although the name field must always come last), but the current
|
||||
version requires the fields come in the order shown above.
|
||||
|
||||
The `git annex examinekey` command can be used to extract information from
|
||||
The [[`git annex examinekey`|git-annex-examinekey]] command can be used to extract information from
|
||||
a key.
|
||||
|
|
|
@ -63,7 +63,7 @@ The following terms can be used in annex.largefiles:
|
|||
|
||||
Looks up the MIME type of a file, and checks if the glob matches it.
|
||||
|
||||
For example, "mimetype=text/*" will match many varieties of text files,
|
||||
For example, `"mimetype=text/*"` will match many varieties of text files,
|
||||
including "text/plain", but also "text/x-shellscript", "text/x-makefile",
|
||||
etc.
|
||||
|
||||
|
@ -72,6 +72,18 @@ The following terms can be used in annex.largefiles:
|
|||
This is only available to use when git-annex was built with the
|
||||
MagicMime build flag.
|
||||
|
||||
* `mimeencoding=glob`
|
||||
|
||||
Looks up the MIME encoding of a file, and checks if the glob matches it.
|
||||
|
||||
For example, `"mimeencoding=binary"` will match many kinds of binary
|
||||
files.
|
||||
|
||||
The MIME encodings are the same that are displayed by running `file --mime-encoding`
|
||||
|
||||
This is only available to use when git-annex was built with the
|
||||
MagicMime build flag.
|
||||
|
||||
* `anything`
|
||||
|
||||
Matches any file.
|
||||
|
|
|
@ -1,3 +1,17 @@
|
|||
If git-tracked files are removed from the remote, they don't get synced over after a "git annex fsck" and "git annex export".
|
||||
|
||||
Is there some way that they could make it to the remote? I'm imagining an rsync-like behavior to copy over files that have different time stamps or file sizes. Would such a feature be welcome in git annex?
|
||||
|
||||
> Since git-annex 6.20180626, `git annex fsck --from` an exporttree=yes remote
|
||||
> will notice if files on it have been deleted, and then
|
||||
> `git annex sync --content` or `git-annex export` will re-upload them.
|
||||
>
|
||||
> But perhaps more interesting, if the remote is also configured with
|
||||
> importtree=yes, `git-annex import` from it can now notice deletions
|
||||
> as well as other changes to the content on the remote, and make a remote
|
||||
> tracking branch in git reflecting the changes. You can then merge or
|
||||
> revert the changes and export or sync can be used to put the deleted
|
||||
> files back on the remote if desired.
|
||||
>
|
||||
> Only a subset of remotes support importree, but the fsck method
|
||||
> will work for all. So, this is [[done]]. --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
ATM there is no `--json-progress` in `git annex add` (only `--json`), so no feedback to the user could be provided on ETA etc. Would be nice to have `--json-progress` there to echo similar one for `get` and `copy`.
|
||||
|
||||
Cheers!
|
||||
|
||||
[[!meta author=yoh]]
|
|
@ -0,0 +1,11 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2019-04-26T14:17:58Z"
|
||||
content="""
|
||||
First things first.. There is no progress of any kind for add of an
|
||||
individual item.
|
||||
|
||||
This would need changes to the Backend interface so it can display progress
|
||||
while hashing..
|
||||
"""]]
|
1
doc/todo/key_checksum_from_chunk_checksums.mdwn
Normal file
1
doc/todo/key_checksum_from_chunk_checksums.mdwn
Normal file
|
@ -0,0 +1 @@
|
|||
Would it be hard to add a variantion to checksumming [[backends]], that would change how the checksum is computed: instead of computing it on the whole file, it would first be computed on file chunks of given size, and then the final checksum computed on the concatenation of the chunk checksums? You'd add a new [[key field|internals/key_format]], say cNNNNN, specifying the chunking size (the last chunk might be shorter). Then (1) for large files, checksum computation could be parallelized (there could be a config option specifying the default chunk size for newly added files); (2) I often have large files on a remote, for which I have md5 for each chunk, but not for the full file; this would enable me to register the location of these fies with git-annex without downloading them, while still using a checksum-based key.
|
|
@ -987,6 +987,7 @@ Executable git-annex
|
|||
Types.LockCache
|
||||
Types.Messages
|
||||
Types.MetaData
|
||||
Types.Mime
|
||||
Types.NumCopies
|
||||
Types.RefSpec
|
||||
Types.Remote
|
||||
|
|
Loading…
Reference in a new issue