Merge branch 'bs' into sqlite-bs

This commit is contained in:
Joey Hess 2019-12-18 15:14:44 -04:00
commit f6c18f6940
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
16 changed files with 263 additions and 48 deletions

View file

@ -133,7 +133,7 @@ makeinfos updated version = do
let infofile = f ++ ".info"
let d = GitAnnexDistribution
{ distributionUrl = mkUrl f
, distributionKey = k
, distributionKey = fromKey id k
, distributionVersion = bv
, distributionReleasedate = now
, distributionUrgentUpgrade = Just "6.20180626"
@ -221,14 +221,18 @@ virusFree f
buildrpms :: FilePath -> [(FilePath, Version)] -> Annex ()
buildrpms topdir l = do
liftIO $ createDirectoryIfMissing True rpmrepo
oldrpms <- map (rpmrepo </>) . filter (".rpm" `isSuffixOf`)
<$> liftIO (getDirectoryContents rpmrepo)
forM_ tarrpmarches $ \(tararch, rpmarch) ->
forM_ (filter (isstandalonetarball tararch . fst) l) $ \(tarball, v) ->
forM_ (filter (isstandalonetarball tararch . fst) l) $ \(tarball, v) -> do
liftIO $ mapM_ nukeFile (filter ((tararch ++ ".rpm") `isSuffixOf`) oldrpms)
void $ liftIO $ boolSystem script
[ Param rpmarch
, File tarball
, Param v
, File rpmrepo
]
void $ inRepo $ runBool [Param "annex", Param "get", File rpmrepo]
void $ liftIO $ boolSystem "createrepo" [File rpmrepo]
void $ inRepo $ runBool [Param "annex", Param "add", File rpmrepo]
where

View file

@ -16,18 +16,18 @@ git-annex (8.20191107) UNRELEASED; urgency=medium
-- Joey Hess <id@joeyh.name> Tue, 29 Oct 2019 15:13:03 -0400
git-annex (7.20191115) UNRELEASED; urgency=medium
git-annex (7.20191219) UNRELEASED; urgency=medium
* Optimised processing of many files, especially by commands like find
and whereis that only report on the state of the repository. Commands
like get also sped up in cases where they have to check a lot of
files but only transfer a few files. Speedups range from 30-100%.
* Sped up many git-annex commands that operate on many files, by
avoiding reserialization of keys.
find is 7% faster; whereis is 3% faster; and git-annex get when
all files are already present is 5% faster
* Stop displaying rsync progress, and use git-annex's own progress display
for local-to-local repo transfers.
* Added build dependency on the filepath-bytestring library.
-- Joey Hess <id@joeyh.name> Wed, 18 Dec 2019 15:12:40 -0400
git-annex (7.20191218) upstream; urgency=medium
* git-lfs: The url provided to initremote/enableremote will now be
stored in the git-annex branch, allowing enableremote to be used without
an url. initremote --sameas can be used to add additional urls.
@ -36,6 +36,13 @@ git-annex (7.20191115) UNRELEASED; urgency=medium
* sync, assistant: Pull and push from git-lfs remotes.
* Fix bug that made bare repos be treated as non-bare when --git-dir
was used.
* inprogress: Support --key.
* Sped up many git-annex commands that operate on many files, by
avoiding reserialization of keys.
find is 7% faster; whereis is 3% faster; and git-annex get when
all files are already present is 5% faster
* Stop displaying rsync progress, and use git-annex's own progress display
for local-to-local repo transfers.
* benchmark: Changed --databases to take a parameter specifiying the size
of the database to benchmark.
* benchmark --databases: Display size of the populated database.
@ -44,7 +51,7 @@ git-annex (7.20191115) UNRELEASED; urgency=medium
* Windows: Fix handling of changes to time zone. (Used to work but was
broken in version 7.20181031.)
-- Joey Hess <id@joeyh.name> Fri, 15 Nov 2019 11:57:19 -0400
-- Joey Hess <id@joeyh.name> Wed, 18 Dec 2019 13:53:51 -0400
git-annex (7.20191114) upstream; urgency=medium

View file

@ -176,21 +176,26 @@ data KeyOptions
parseKeyOptions :: Parser KeyOptions
parseKeyOptions = parseAllOption
<|> parseBranchKeysOption
<|> flag' WantUnusedKeys
( long "unused" <> short 'U'
<> help "operate on files found by last run of git-annex unused"
)
<|> (WantSpecificKey <$> option (str >>= parseKey)
( long "key" <> metavar paramKey
<> help "operate on specified key"
))
<|> parseUnusedKeysOption
<|> parseSpecificKeyOption
parseUnusedKeysOption :: Parser KeyOptions
parseUnusedKeysOption = flag' WantUnusedKeys
( long "unused" <> short 'U'
<> help "operate on files found by last run of git-annex unused"
)
parseSpecificKeyOption :: Parser KeyOptions
parseSpecificKeyOption = WantSpecificKey <$> option (str >>= parseKey)
( long "key" <> metavar paramKey
<> help "operate on specified key"
)
parseBranchKeysOption :: Parser KeyOptions
parseBranchKeysOption =
WantBranchKeys <$> some (option (str >>= pure . Ref)
( long "branch" <> metavar paramRef
<> help "operate on files in the specified branch or treeish"
))
parseBranchKeysOption = WantBranchKeys <$> some (option (str >>= pure . Ref)
( long "branch" <> metavar paramRef
<> help "operate on files in the specified branch or treeish"
))
parseFailedTransfersOption :: Parser KeyOptions
parseFailedTransfersOption = flag' WantFailedTransfers

View file

@ -19,24 +19,24 @@ cmd = noCommit $ noMessages $ command "inprogress" SectionQuery
data InprogressOptions = InprogressOptions
{ inprogressFiles :: CmdParams
, allOption :: Bool
, keyOptions :: Maybe KeyOptions
}
optParser :: CmdParamsDesc -> Parser InprogressOptions
optParser desc = InprogressOptions
<$> cmdParams desc
<*> switch
( long "all"
<> short 'A'
<> help "access all files currently being downloaded"
)
<*> optional (parseAllOption <|> parseSpecificKeyOption)
seek :: InprogressOptions -> CommandSeek
seek o = do
ts <- map (transferKey . fst) <$> getTransfers
if allOption o
then forM_ ts $ commandAction . start'
else do
case keyOptions o of
Just WantAllKeys ->
forM_ ts $ commandAction . start'
Just (WantSpecificKey k)
| k `elem` ts -> commandAction (start' k)
| otherwise -> commandAction stop
_ -> do
let s = S.fromList ts
withFilesInGit
(commandAction . (whenAnnexed (start s)))

View file

@ -0,0 +1,76 @@
In datalad test builds with git-annex 7.20191114+git43-ge29663773, one
of the new test failures is due to an unexpectedly dirty repository
([related datalad issue][0]). The dirty status comes from a file that
was tracked in Git switching over to an annex pointer file. Here's a
script that distills enough of the test to trigger the failure on my
end.
[[!format sh """
#!/bin/sh
set -eu
assert_clean () {
if test -n "$(git status --porcelain)"
then
printf "\n\nUnexpectedly dirty:\n" >&2
git status >&2
git diff >&2
exit 1
fi
}
cd "$(mktemp -d --tmpdir gx-pointer-dirty-XXXXXXX)"
git init && git annex init
printf content-git >file-git
git -c annex.largefiles=nothing annex add -- file-git
git commit -m'file-git added'
assert_clean
printf content-annex >file-annex
git -c annex.largefiles=anything annex add -- file-annex
git commit -m'file-annex annexed'
assert_clean
"""]]
On Travis as well as my local machine, the failure is intermittent,
but seems to happen much more often than not. In the failing case,
the last assert_clean call shows:
```
Unexpectedly dirty:
On branch master
Changes not staged for commit:
modified: file-git
no changes added to commit
diff --git a/file-git b/file-git
index d1c416a..b41ca32 100644
--- a/file-git
+++ b/file-git
@@ -1 +1 @@
-content-git
\ No newline at end of file
+/annex/objects/SHA256E-s11--726732d25826965592478fcc7c145d5a10fa1aa70c49fe3a4f847174b6d8889c
```
I see the failure with git-annex built from the latest master
b962471c2 (2019-12-12). Bisecting against the git-annex repo (with a
commit being marked "bad" if there was a failure within ten runs of the
above script), points to ec08b66bd (shouldAnnex: check isInodeKnown,
2019-10-23) as the first bad commit. Just looking at the topic of
the commit, that result seems plausible to me.
### Other details
My git version 2.24.1 and locally I'm building git-annex through guix.
On the failing Travis run, git-annex 7.20191114+git43-ge29663773 came
from neurodebian, and the git version was 2.24.0.
Hopefully the script above is sufficient to trigger the issue on your end.
Thanks for having a look.
[0]: https://github.com/datalad/datalad/issues/3890
[[!meta author=kyle]]

View file

@ -0,0 +1,51 @@
[[!comment format=mdwn
username="lykos@d125a37d89b1cfac20829f12911656c40cb70018"
nickname="lykos"
avatar="http://cdn.libravatar.org/avatar/085df7b04d3408ba23c19f9c49be9ea2"
subject="comment 3"
date="2019-12-12T21:11:58Z"
content="""
Apparently it happens when the remote returns PREPARE-FAILURE (in this case due to insufficient internet connection). So we get back to
[PREPARE-LOCAL](http://git-annex.branchable.com/design/external_special_remote_protocol/#comment-2eec51c9f774f577b8634e9cdc86cde3) and [external_remote_querying_transition](https://git-annex.branchable.com/todo/external_remote_querying_transition).
I'm going to change git-annex-remote-googledrive to defer network connection to when when it's needed. But as external remotes are encouraged to establish network connections in PREPARE, something should be done on git-annex's side, too.
% git annex addurl --batch --with-files --debug
[url] [filename]
[2019-12-12 21:41:14.540686936] read: git [\"--git-dir=../../../.git\",\"--work-tree=../../..\",\"--literal-pathspecs\",\"show-ref\",\"git-annex\"]
[2019-12-12 21:41:14.546632041] process done ExitSuccess
[2019-12-12 21:41:14.546795605] read: git [\"--git-dir=../../../.git\",\"--work-tree=../../..\",\"--literal-pathspecs\",\"show-ref\",\"--hash\",\"refs/heads/git-annex\"]
[2019-12-12 21:41:14.551391222] process done ExitSuccess
[2019-12-12 21:41:14.551593051] read: git [\"--git-dir=../../../.git\",\"--work-tree=../../..\",\"--literal-pathspecs\",\"log\",\"refs/heads/git-annex..802429a871b754d61047457aa9936bde404e3172\",\"--pretty=%H\",\"-n1\"]
[2019-12-12 21:41:14.553261192] process done ExitSuccess
[2019-12-12 21:41:14.553318623] read: git [\"--git-dir=../../../.git\",\"--work-tree=../../..\",\"--literal-pathspecs\",\"log\",\"refs/heads/git-annex..0e8f6eb6cc45d9bb11f42fcf8590b3473e6f2623\",\"--pretty=%H\",\"-n1\"]
[2019-12-12 21:41:14.555647103] process done ExitSuccess
[2019-12-12 21:41:14.555710805] read: git [\"--git-dir=../../../.git\",\"--work-tree=../../..\",\"--literal-pathspecs\",\"log\",\"refs/heads/git-annex..7271aaaa110b0f84b5e730673c66b176789ddcaf\",\"--pretty=%H\",\"-n1\"]
[2019-12-12 21:41:14.557235619] process done ExitSuccess
[2019-12-12 21:41:14.557298991] read: git [\"--git-dir=../../../.git\",\"--work-tree=../../..\",\"--literal-pathspecs\",\"log\",\"refs/heads/git-annex..b9112bcb862041cf96f00e12c6dd33dc31fda5fa\",\"--pretty=%H\",\"-n1\"]
[2019-12-12 21:41:14.560071044] process done ExitSuccess
[2019-12-12 21:41:14.560126818] read: git [\"--git-dir=../../../.git\",\"--work-tree=../../..\",\"--literal-pathspecs\",\"log\",\"refs/heads/git-annex..1598b00a78067edc438652f401498a25635cd5a9\",\"--pretty=%H\",\"-n1\"]
[2019-12-12 21:41:14.562966332] process done ExitSuccess
[2019-12-12 21:41:14.563218447] chat: git [\"--git-dir=../../../.git\",\"--work-tree=../../..\",\"--literal-pathspecs\",\"cat-file\",\"--batch\"]
[2019-12-12 21:41:14.563535554] chat: git [\"--git-dir=../../../.git\",\"--work-tree=../../..\",\"--literal-pathspecs\",\"cat-file\",\"--batch-check=%(objectname) %(objecttype) %(objectsize)\"]
[2019-12-12 21:41:14.56562967] read: git [\"config\",\"--null\",\"--list\"]
[2019-12-12 21:41:14.566788099] process done ExitSuccess
[2019-12-12 21:41:14.567203332] read: git [\"config\",\"--null\",\"--list\"]
[2019-12-12 21:41:14.568286923] read: git [\"config\",\"--null\",\"--list\"]
[2019-12-12 21:41:14.569077497] chat: /home/silvio/.local/bin/git-annex-remote-googledrive []
[2019-12-12 21:41:14.944193013] git-annex-remote-googledrive[1] --> VERSION 1
[2019-12-12 21:41:14.944312387] git-annex-remote-googledrive[1] <-- EXTENSIONS INFO
[2019-12-12 21:41:14.944486597] git-annex-remote-googledrive[1] --> EXTENSIONS
[2019-12-12 21:41:14.944557495] git-annex-remote-googledrive[1] <-- EXPORTSUPPORTED
[2019-12-12 21:41:14.94467435] git-annex-remote-googledrive[1] --> EXPORTSUPPORTED-SUCCESS
[2019-12-12 21:41:14.944906493] chat: /home/silvio/.local/bin/git-annex-remote-googledrive []
[2019-12-12 21:41:15.333971094] git-annex-remote-googledrive[1] --> VERSION 1
[2019-12-12 21:41:15.334086558] git-annex-remote-googledrive[1] <-- EXTENSIONS INFO
[2019-12-12 21:41:15.334273359] git-annex-remote-googledrive[1] --> EXTENSIONS
[2019-12-12 21:41:15.334334558] git-annex-remote-googledrive[1] <-- PREPARE
[...]
[2019-12-12 21:41:43.372727829] git-annex-remote-googledrive[1] --> PREPARE-FAILURE ('Failed to connect with Google. Please check your internet connection.', ServerNotFoundError('Unable to find the server at www.googleapis.com'))
git-annex: ('Failed to connect with Google. Please check your internet connection.', ServerNotFoundError('Unable to find the server at www.googleapis.com'))
"""]]

View file

@ -40,6 +40,10 @@ some file formats can be usefully streamed in this way.
Rather than specifying a filename or path, this option can be
used to access all files that are currently being downloaded.
* `--key=keyname`
Access the file that is currently being downloaded for the specified key.
* file matching options
The [[git-annex-matching-options]](1)
@ -47,7 +51,7 @@ some file formats can be usefully streamed in this way.
# EXIT STATUS
If any of the requested files are not currently being downloaded,
If any of the requested items are not currently being downloaded,
the exit status will be 1.
# SEE ALSO

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="mike@4e7a118bd37129091199ae0fb28184999c5f4725"
nickname="mike"
avatar="http://cdn.libravatar.org/avatar/9751260f8a13fc27ad837eaf66fac5d7"
subject="repodata is outdated"
date="2019-12-18T10:06:08Z"
content="""
The repodata under https://downloads.kitenet.net/git-annex/linux/current/rpms/repodata/ is outdated (from September), latest RPM in https://downloads.kitenet.net/git-annex/linux/current/rpms/ is from November. I (think) I had an issue using git-annex-shell using the 201909 version, so this is somewhat relevant.
"""]]

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2019-12-18T18:28:51Z"
content="""
I've fixed it, should include the latest release now.
Please do file a bug report if you find problems with the rpms.
"""]]

View file

@ -1,14 +0,0 @@
git-annex 7.20191017 released with [[!toggle text="these changes"]]
[[!toggleable text="""
* initremote: Added --sameas option, allows for two special remotes that
access the same data store.
* Note that due to complications of the sameas feature, any external
special remotes that try to send SETSTATE or GETSTATE during INITREMOTE
or EXPORTSUPPORTED will now get back an ERROR. That would be a very
hackish thing for an external special remote to do, needing some kind
of hard-coded key value to be used, so probably nothing will be affected.
* forget --drop-dead: Remove several classes of git-annex log files
when they become empty, further reducing the size of the git-annex branch.
* OSX: Deal with symbolic link problem that caused git to not be included in
the git-annex.dmg.
* Fix build with persistent-2.10."""]]

View file

@ -0,0 +1,24 @@
git-annex 7.20191218 released with [[!toggle text="these changes"]]
[[!toggleable text="""
* git-lfs: The url provided to initremote/enableremote will now be
stored in the git-annex branch, allowing enableremote to be used without
an url. initremote --sameas can be used to add additional urls.
* git-lfs: When there's a git remote with an url that's known to be
used for git-lfs, automatically enable the special remote.
* sync, assistant: Pull and push from git-lfs remotes.
* Fix bug that made bare repos be treated as non-bare when --git-dir
was used.
* inprogress: Support --key.
* Sped up many git-annex commands that operate on many files, by
avoiding reserialization of keys.
find is 7% faster; whereis is 3% faster; and git-annex get when
all files are already present is 5% faster
* Stop displaying rsync progress, and use git-annex's own progress display
for local-to-local repo transfers.
* benchmark: Changed --databases to take a parameter specifiying the size
of the database to benchmark.
* benchmark --databases: Display size of the populated database.
* benchmark --databases: Improve the "addAssociatedFile (new)"
benchmark to really add new values, not overwriting old values.
* Windows: Fix handling of changes to time zone. (Used to work but was
broken in version 7.20181031.)"""]]

View file

@ -0,0 +1,15 @@
[[!comment format=mdwn
username="https://christian.amsuess.com/chrysn"
nickname="chrysn"
avatar="http://christian.amsuess.com/avatar/c6c0d57d63ac88f3541522c4b21198c3c7169a665a2f2d733b4f78670322ffdc"
subject="Would be useful"
date="2019-12-17T09:08:08Z"
content="""
A `git annex cat` would be useful for the very web server purpose you describe (WIP at https://gitlab.com/chrysn/annex-to-web, though I'm not sure it's going anywhere).
Unlike `git annex inprogress` that I (will) use for a workaround, this could take a `--skip` argument that usually just seeks into the file.
If the data is served from a remote that allows seeking access (eg. IPFS),
then that access could be priorized and that part downloaded first.
(Implementing this would require another tmp pool for sparse files as they couldn't go with the `git annex inprogress` files for there is the expectation that those would grow to completion,
but anyway this would be an entry point for such a feature if it is ever added).
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 2"""
date="2019-12-18T17:49:47Z"
content="""
git-annex's API for getting object content from remotes involve a
destination file that is written to. That limits the efficiency of such a
command. There would need to be a separate API for streaming, which some
remotes will not have any hope of supporting.
"""]]

View file

@ -0,0 +1,10 @@
Unlike `whereis` and other subcommands, `inprogress` does not offer a `--key` argument to select files by key rather than checked-out name,
making it unusable in bare repositories.
Please consider adding a `--key` option there, which would display the single incomplete file corresponding to the key if one is in progress.
My use case is serving git-annexed files to the web from a bare repository (<https://gitlab.com/chrysn/annex-to-web>, see also [[todo/git-annex-cat]]), which would be especially useful with gitolite repositories as they are by design bare, and on devices where checkouts are cumbersome (cf. [[forum/Dealing_with_crippled_Android_file_system]]).
A workaround is running `git annex inprogress --all | grep $KEY`, but that's probably relying on an implementation detail that could be changed at any time (though it probably won't as to avoid race conditions as in `tail -f $(git annex inprogress file-thats-almost.done)`).
> [[done]] --[[Joey]]

View file

@ -0,0 +1,5 @@
I want to add some dotfiles in the root of my repository to git-annex as unlocked annexed files. So I edited `.git/info/attributes` to remove the line `.* !filter`, such that it only contains the line `* filter=annex`. This seems to be working fine.
I was thinking that it might make sense to have a `git annex config` option to tell git-annex not to add the `.* !filter` line to `.git/info/attributes` when initialising other clones of this repo. In the meantime, I've worked around it using a `post_checkout` hook in my `~/.mrconfig` which edits `.git/info/attributes`.
--spwhitton

View file

@ -1,5 +1,5 @@
Name: git-annex
Version: 8.20191121
Version: 8.20191219
Cabal-Version: >= 1.8
License: AGPL-3
Maintainer: Joey Hess <id@joeyh.name>