Merge branch 'master' into sameas
This commit is contained in:
commit
37f725a9f7
72 changed files with 1120 additions and 65 deletions
|
@ -9,15 +9,20 @@
|
|||
|
||||
module Annex.Content.PointerFile where
|
||||
|
||||
#if ! defined(mingw32_HOST_OS)
|
||||
import System.Posix.Files
|
||||
#else
|
||||
import System.PosixCompat.Files
|
||||
#endif
|
||||
|
||||
import Annex.Common
|
||||
import Annex.Perms
|
||||
import Annex.Link
|
||||
import Annex.ReplaceFile
|
||||
import Annex.InodeSentinal
|
||||
import Utility.InodeCache
|
||||
import Annex.Content.LowLevel
|
||||
import Utility.InodeCache
|
||||
import Utility.Touch
|
||||
|
||||
{- Populates a pointer file with the content of a key.
|
||||
-
|
||||
|
@ -48,10 +53,18 @@ populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f)
|
|||
- Does not check if the pointer file is modified. -}
|
||||
depopulatePointerFile :: Key -> FilePath -> Annex ()
|
||||
depopulatePointerFile key file = do
|
||||
mode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus file
|
||||
st <- liftIO $ catchMaybeIO $ getFileStatus file
|
||||
let mode = fmap fileMode st
|
||||
secureErase file
|
||||
liftIO $ nukeFile file
|
||||
ic <- replaceFile file $ \tmp -> do
|
||||
liftIO $ writePointerFile tmp key mode
|
||||
#if ! defined(mingw32_HOST_OS)
|
||||
-- Don't advance mtime; this avoids unncessary re-smudging
|
||||
-- by git in some cases.
|
||||
liftIO $ maybe noop
|
||||
(\t -> touch tmp t False)
|
||||
(fmap modificationTimeHiRes st)
|
||||
#endif
|
||||
withTSDelta (liftIO . genInodeCache tmp)
|
||||
maybe noop (restagePointerFile (Restage True) file) ic
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
{- git-annex git hooks
|
||||
-
|
||||
- Note that it's important that the scripts installed by git-annex
|
||||
- not change, otherwise removing old hooks using an old version of
|
||||
- the script would fail.
|
||||
- Note that it's important that the content of scripts installed by
|
||||
- git-annex not change, otherwise removing old hooks using an old
|
||||
- version of the script would fail.
|
||||
-
|
||||
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -18,6 +18,16 @@ import Utility.Shell
|
|||
|
||||
import qualified Data.Map as M
|
||||
|
||||
-- Remove all hooks.
|
||||
unHook :: Annex ()
|
||||
unHook = do
|
||||
hookUnWrite preCommitHook
|
||||
hookUnWrite postReceiveHook
|
||||
hookUnWrite postCheckoutHook
|
||||
hookUnWrite postMergeHook
|
||||
hookUnWrite preCommitAnnexHook
|
||||
hookUnWrite postUpdateAnnexHook
|
||||
|
||||
preCommitHook :: Git.Hook
|
||||
preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .") []
|
||||
|
||||
|
|
|
@ -128,8 +128,8 @@ initialize' mversion = checkCanInitialize $ do
|
|||
|
||||
uninitialize :: Annex ()
|
||||
uninitialize = do
|
||||
hookUnWrite preCommitHook
|
||||
hookUnWrite postReceiveHook
|
||||
unHook
|
||||
deconfigureSmudgeFilter
|
||||
removeRepoUUID
|
||||
removeVersion
|
||||
|
||||
|
|
|
@ -132,7 +132,7 @@ sleepingActivityThread urlrenderer activity lasttime = go lasttime =<< getnextti
|
|||
tolate nowt tz = case mmaxt of
|
||||
Just maxt -> nowt > maxt
|
||||
-- allow the job to start 10 minutes late
|
||||
Nothing ->diffUTCTime
|
||||
Nothing -> diffUTCTime
|
||||
(localTimeToUTC tz nowt)
|
||||
(localTimeToUTC tz t) > 600
|
||||
run nowt = do
|
||||
|
|
48
CHANGELOG
48
CHANGELOG
|
@ -1,24 +1,42 @@
|
|||
git-annex (7.20190913) UNRELEASED; urgency=medium
|
||||
git-annex (7.20191011) UNRELEASED; urgency=medium
|
||||
|
||||
* Added --mimetype and --mimeencoding file matching options.
|
||||
* Added --unlocked and --locked file matching options.
|
||||
* git-lfs: Added support for http basic auth.
|
||||
* git-lfs: Only do endpoint discovery once when concurrency is enabled.
|
||||
* Test: Use more robust directory removal when built with directory-1.2.7.
|
||||
* Close sqlite databases more robustly.
|
||||
* adjust --lock: This enters an adjusted branch where files are locked.
|
||||
* remotedaemon: Don't list --stop in help since it's not supported.
|
||||
* enable-tor: Run kdesu with -c option.
|
||||
* enable-tor: Use pkexec to run command as root when gksu and kdesu are not
|
||||
available.
|
||||
* Fix bug in handling of annex.largefiles that use largerthan/smallerthan.
|
||||
When adding a modified file, it incorrectly used the file size of the
|
||||
old version of the file, not the current size.
|
||||
* initremote: Added --sameas option, allows for two special remotes that
|
||||
access the same data store.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Thu, 19 Sep 2019 11:11:19 -0400
|
||||
|
||||
git-annex (7.20191009) upstream; urgency=medium
|
||||
|
||||
* Fix bug in handling of annex.largefiles that use largerthan/smallerthan.
|
||||
When adding a modified file, it incorrectly used the file size of the
|
||||
old version of the file, not the current size.
|
||||
* Added --mimetype and --mimeencoding file matching options.
|
||||
* Added --unlocked and --locked file matching options.
|
||||
* Added adjust --lock, to enter an adjusted branch where files are locked.
|
||||
* git-lfs: Added support for http basic auth.
|
||||
* git-lfs: Only do endpoint discovery once when concurrency is enabled.
|
||||
* fsck --incremental/--more: Fix bug that prevented the incremental fsck
|
||||
information from being updated every 5 minutes as it was supposed to be;
|
||||
it was only updated after 1000 files were checked, which may be more
|
||||
files that are possible to fsck in a given fsck time window.
|
||||
Thanks to Peter Simons for help with analysis of this bug.
|
||||
* Test: Use more robust directory removal when built with directory-1.2.7.
|
||||
* Close sqlite databases more robustly.
|
||||
* remotedaemon: Don't list --stop in help since it's not supported.
|
||||
* enable-tor: Run kdesu with -c option.
|
||||
* enable-tor: Use pkexec to run command as root when gksu and kdesu are not
|
||||
available.
|
||||
* When dropping an unlocked file, preserve its mtime, which avoids
|
||||
git status unncessarily running the clean filter on the file.
|
||||
* uninit: Remove several git hooks that git-annex init sets up.
|
||||
* uninit: Remove the smudge and clean filters that git-annex init sets up.
|
||||
* Work around git cat-file --batch's odd stripping of carriage return
|
||||
from the end of the line (some windows infection), avoiding crashing
|
||||
when the repo contains a filename ending in a carriage return.
|
||||
* git-annex-standalone.rpm: Fix the git-annex-shell symlink.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Wed, 09 Oct 2019 12:31:31 -0400
|
||||
|
||||
git-annex (7.20190912) upstream; urgency=medium
|
||||
|
||||
* Default to v7 for new repositories.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- Git smudge filter configuration
|
||||
-
|
||||
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -32,10 +32,25 @@ configureSmudgeFilter = unlessM (fromRepo Git.repoIsLocalBare) $ do
|
|||
gfs <- readattr gf
|
||||
liftIO $ unless ("filter=annex" `isInfixOf` (lfs ++ gfs)) $ do
|
||||
createDirectoryIfMissing True (takeDirectory lf)
|
||||
writeFile lf (lfs ++ "\n" ++ stdattr)
|
||||
writeFile lf (lfs ++ "\n" ++ unlines stdattr)
|
||||
where
|
||||
readattr = liftIO . catchDefaultIO "" . readFileStrict
|
||||
stdattr = unlines
|
||||
[ "* filter=annex"
|
||||
, ".* !filter"
|
||||
]
|
||||
|
||||
stdattr :: [String]
|
||||
stdattr =
|
||||
[ "* filter=annex"
|
||||
, ".* !filter"
|
||||
]
|
||||
|
||||
-- Note that this removes the local git attributes for filtering,
|
||||
-- which is what git-annex installed, but it does not change anything
|
||||
-- that may have been committed to a .gitattributes in the repository.
|
||||
-- git-annex does not commit that.
|
||||
deconfigureSmudgeFilter :: Annex ()
|
||||
deconfigureSmudgeFilter = do
|
||||
lf <- Annex.fromRepo Git.attributesLocal
|
||||
ls <- liftIO $ catchDefaultIO [] $ lines <$> readFileStrict lf
|
||||
liftIO $ writeFile lf $ unlines $
|
||||
filter (\l -> l `notElem` stdattr && not (null l)) ls
|
||||
unsetConfig (ConfigKey "filter.annex.smudge")
|
||||
unsetConfig (ConfigKey "filter.annex.clean")
|
||||
|
|
|
@ -83,7 +83,7 @@ addDb (FsckHandle h _) k = H.queueDb h checkcommit $
|
|||
| sz > 1000 = return True
|
||||
| otherwise = do
|
||||
now <- getCurrentTime
|
||||
return $ diffUTCTime lastcommittime now > 300
|
||||
return $ diffUTCTime now lastcommittime > 300
|
||||
|
||||
{- Doesn't know about keys that were just added with addDb. -}
|
||||
inDb :: FsckHandle -> Key -> IO Bool
|
||||
|
|
|
@ -57,7 +57,7 @@ queueDb a (WriteHandle h) = H.queueDb h checkcommit a
|
|||
| sz > 1000 = return True
|
||||
| otherwise = do
|
||||
now <- getCurrentTime
|
||||
return $ diffUTCTime lastcommittime now > 300
|
||||
return $ diffUTCTime now lastcommittime > 300
|
||||
|
||||
addAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO ()
|
||||
addAssociatedFile ik f = queueDb $ do
|
||||
|
|
|
@ -132,6 +132,10 @@ query hdl object newlinefallback receive
|
|||
-- filename itself contains a newline, have to fall back to another
|
||||
-- method of getting the information.
|
||||
| '\n' `elem` s = newlinefallback
|
||||
-- git strips carriage return from the end of a line, out of some
|
||||
-- misplaced desire to support windows, so also use the newline
|
||||
-- fallback for those.
|
||||
| "\r" `isSuffixOf` s = newlinefallback
|
||||
| otherwise = CoProcess.query hdl send receive
|
||||
where
|
||||
send to = hPutStrLn to s
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
### Please describe the problem.
|
||||
|
||||
`/usr/bin/git-annex-shell` is linked to `/usr/lib/git-annex.linux/git-annex`, not `/usr/lib/git-annex.linux/git-annex-shell` after installing from git annex standalone rpm using yum.
|
||||
|
||||
As a result, commands passed through `git-annex-shell` don't work, but other functions I tried work properly (e.g. initializing repos, adding files). I found the bug after running `git annex get myawesomefile` and getting a confusing error message about `git-annex` usage.
|
||||
|
||||
Running `/usr/lib/git-annex.linux/git-annex-shell configlist ~/path/to/my/repo` would produce correct output, but `/usr/bin/git-annex-shell configlist ~/path/to/my/repo` would raise errors. Relinking `/usr/bin/git-annex-shell` seems to have fixed the problem.
|
||||
|
||||
|
||||
### What steps will reproduce the problem?
|
||||
|
||||
- Install git annex following these steps: https://git-annex.branchable.com/install/rpm_standalone/
|
||||
- Run any `git-annex-shell` command. In my case, `git-annex-shell configlist ~/path/to/my/repo`.
|
||||
|
||||
### What version of git-annex are you using? On what operating system?
|
||||
|
||||
git-annex version: 7.20190912-g05bc37910
|
||||
build flags: Assistant Webapp Pairing S3 WebDAV Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite
|
||||
dependency versions: aws-0.20 bloomfilter-2.0.1.0 cryptonite-0.25 DAV-1.3.3 feed-1.0.0.0 ghc-8.4.4 http-client-0.5.13.1 persistent-sqlite-2.8.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0
|
||||
operating system: linux x86_64 (CentOS 7)
|
||||
|
||||
### Please provide any additional information below.
|
||||
|
||||
Here is output from one time I tried to run `git-annex-shell` over ssh. Note that the error message gives usage for `git-annex`, not `git-annex-shell`.
|
||||
|
||||
[[!format sh """
|
||||
$ ssh username@my.remote.server 'git-annex-shell configlist ~/path/to/my/repo'
|
||||
Invalid argument `configlist'
|
||||
|
||||
Usage: git-annex COMMAND
|
||||
git-annex - manage files with git, without checking their contents in
|
||||
|
||||
Commonly used commands:
|
||||
|
||||
add PATH ... add files to annex
|
||||
addurl URL ... add urls to annex
|
||||
"""]]
|
||||
|
||||
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
|
||||
|
||||
Git annex does exactly what I want, without the complicated setup or fees of git lfs. Thanks for your work maintaining this project!
|
||||
|
||||
> [[fixed|done]] in git; the rpm itself will only get generated at the next
|
||||
> release though. --[[Joey]]
|
|
@ -0,0 +1,39 @@
|
|||
### Please describe the problem.
|
||||
The watcher in `git annex webapp` crashes (see error below).
|
||||
|
||||
It seems to be a result of having the file `Icon^M^M` in `.gitignore`
|
||||
|
||||
|
||||
### What steps will reproduce the problem?
|
||||
1. Add `Icon^M^M` into `.gitignore`
|
||||
2. Start the webapp
|
||||
3. See the error in the top right hand side of the screen
|
||||
|
||||
### What version of git-annex are you using? On what operating system?
|
||||
|
||||
[[!format sh """
|
||||
git-annex version: 7.20190912
|
||||
build flags: Assistant Webapp Pairing S3 WebDAV FsEvents TorrentParser MagicMime Feeds Testsuite
|
||||
dependency versions: aws-0.21.1 bloomfilter-2.0.1.0 cryptonite-0.26 DAV-1.3.3 feed-1.2.0.0 ghc-8.6.5 http-client-0.6.4 persistent-sqlite-2.10.5 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0
|
||||
key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL
|
||||
remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs hook external
|
||||
operating system: darwin x86_64
|
||||
supported repository versions: 7
|
||||
upgrade supported from repository versions: 0 1 2 3 4 5 6
|
||||
local repository version: 7
|
||||
"""]]
|
||||
|
||||
Running on MacOS 10.13.6
|
||||
### Please provide any additional information below.
|
||||
|
||||
[[!format sh """
|
||||
Watcher crashed: unknown response from git cat-file ("HEAD:./Library/Icon missing",Ref "HEAD:./Library/Icon\r")
|
||||
CallStack (from HasCallStack):
|
||||
error, called at ./Git/CatFile.hs:119:28 in main:Git.CatFile
|
||||
"""]]
|
||||
|
||||
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
|
||||
|
||||
Yes! Been using it since the start. Donated. Will donate again if you run a funding run.
|
||||
|
||||
> [[fixed|done]] --[[Joey]]
|
|
@ -0,0 +1,46 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2019-10-08T18:53:09Z"
|
||||
content="""
|
||||
The error message shows that the problem is that git cat-file
|
||||
output something ending with a carriage return.
|
||||
|
||||
I don't think that the carriage return in your .gitignore is directly
|
||||
related. git-annex uses `git check-ignore -z` which uses NUL for a
|
||||
delimited and not newline characters.
|
||||
|
||||
It's interesting that carriage returns would cause a problem with git
|
||||
cat-file. Its interface is obviously problimatic for filenames containing
|
||||
newlines, and git-annex has worked around that for a while.
|
||||
|
||||
Here's git cat-file --batch falling over on a carriage return, indeed:
|
||||
|
||||
joey@darkstar:/tmp/bad>git ls-tree HEAD
|
||||
100644 blob 79e1eee83674b65519a4a9d632bb38dda357512b .gitignore
|
||||
100644 blob d8a7f641c2ded93c164528b87fa17a12e7e6a5b1 foo
|
||||
100644 blob f8e47b9532ea17ac825c39bddc35dbd68f120a46 "foo\\r"
|
||||
100644 blob 4ed2fceb3af4c9dc27097d9a3f7d88973ffa2884 x
|
||||
100644 blob 4c2dbb3e16f26cdccc6da3aea3c5e69fe46098f5 y
|
||||
joey@darkstar:/tmp/bad>printf 'HEAD:foo\r' | git cat-file --batch | hexdump -C
|
||||
00000000 48 45 41 44 3a 66 6f 6f 0d 20 6d 69 73 73 69 6e |HEAD:foo. missin|
|
||||
00000010 67 0a |g.|
|
||||
00000012
|
||||
|
||||
Here's the code from git that seems responsible:
|
||||
|
||||
int strbuf_getline(struct strbuf *sb, FILE *fp)
|
||||
{
|
||||
if (strbuf_getwholeline(sb, fp, '\n'))
|
||||
return EOF;
|
||||
if (sb->buf[sb->len - 1] == '\n') {
|
||||
strbuf_setlen(sb, sb->len - 1);
|
||||
if (sb->len && sb->buf[sb->len - 1] == '\r')
|
||||
strbuf_setlen(sb, sb->len - 1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
I've griped on the git mailing list, but also gonna fix git-annex to use
|
||||
the slow fallback for filenames with carriage returns.
|
||||
"""]]
|
117
doc/bugs/git-lfs_remote_URL_is_not_recorded__63__.mdwn
Normal file
117
doc/bugs/git-lfs_remote_URL_is_not_recorded__63__.mdwn
Normal file
|
@ -0,0 +1,117 @@
|
|||
### Please describe the problem.
|
||||
|
||||
I was trying to follow https://git-annex.branchable.com/special_remotes/git-lfs/ (only without any encryption), to store at least some data on github via LFS (e.g., for https://github.com/dandi-datasets/nwb_test_data).
|
||||
|
||||
Even though I do provide URL to the `annex initremote` call, it is not stored within `remote.log`:
|
||||
|
||||
|
||||
[[!format sh """
|
||||
$> sudo rm -rf /tmp/testds2 && ( mkdir /tmp/testds2 && cd /tmp/testds2 && git init && git annex init && git annex initremote gh-lfs autoenable=true type=git-lfs url=git@github.com:yarikoptic/testds2.git encryption=none && git show git-annex:remote.log; )
|
||||
Initialized empty Git repository in /tmp/testds2/.git/
|
||||
init (scanning for unlocked files...)
|
||||
ok
|
||||
(recording state in git...)
|
||||
initremote gh-lfs ok
|
||||
(recording state in git...)
|
||||
c9132e68-e9d8-40b5-ba34-5d60a8b9c844 autoenable=true encryption=none name=gh-lfs type=git-lfs timestamp=1570642576.06742667s
|
||||
|
||||
"""]]
|
||||
|
||||
git annex 7.20190912-1~ndall+1
|
||||
|
||||
|
||||
If I just proceed, populate and copy some data via lfs (example uses datalad's `create-sibling-github` to create a new repo):
|
||||
|
||||
[[!format sh """
|
||||
$> ( cd /tmp/testds2 && touch 123 && git annex add 123 && git commit -m 'add 123' && datalad create-sibling-github -s origin testds2 && git push -u origin master && git annex copy --to=gh-lfs 123; git push origin git-annex; )
|
||||
add 123
|
||||
ok
|
||||
(recording state in git...)
|
||||
[master (root-commit) d2b2f52] add 123
|
||||
1 file changed, 1 insertion(+)
|
||||
create mode 120000 123
|
||||
[WARNING] Authentication failed using a token.
|
||||
.: origin(-) [https://github.com/yarikoptic/testds2.git (git)]
|
||||
'https://github.com/yarikoptic/testds2.git' configured as sibling 'origin' for <Dataset path=/tmp/testds2>
|
||||
Enumerating objects: 3, done.
|
||||
Counting objects: 100% (3/3), done.
|
||||
Delta compression using up to 4 threads
|
||||
Compressing objects: 100% (2/2), done.
|
||||
Writing objects: 100% (3/3), 307 bytes | 307.00 KiB/s, done.
|
||||
Total 3 (delta 0), reused 0 (delta 0)
|
||||
To github.com:yarikoptic/testds2.git
|
||||
* [new branch] master -> master
|
||||
Branch 'master' set up to track remote branch 'master' from 'origin'.
|
||||
copy 123 (to gh-lfs...)
|
||||
ok
|
||||
(recording state in git...)
|
||||
Enumerating objects: 19, done.
|
||||
Counting objects: 100% (19/19), done.
|
||||
Delta compression using up to 4 threads
|
||||
Compressing objects: 100% (15/15), done.
|
||||
Writing objects: 100% (19/19), 1.66 KiB | 567.00 KiB/s, done.
|
||||
Total 19 (delta 4), reused 0 (delta 0)
|
||||
remote: Resolving deltas: 100% (4/4), done.
|
||||
remote:
|
||||
remote: Create a pull request for 'git-annex' on GitHub by visiting:
|
||||
remote: https://github.com/yarikoptic/testds2/pull/new/git-annex
|
||||
remote:
|
||||
To github.com:yarikoptic/testds2.git
|
||||
* [new branch] git-annex -> git-annex
|
||||
|
||||
"""]]
|
||||
|
||||
on a new clone I get a complaint that `url=` is missing, and no data is fetched
|
||||
|
||||
[[!format sh """
|
||||
$> sudo rm -rf testds2-clone && git clone git@github.com:yarikoptic/testds2.git testds2-clone && ( cd testds2-clone && git annex init && git annex get 123; )
|
||||
Cloning into 'testds2-clone'...
|
||||
remote: Enumerating objects: 22, done.
|
||||
remote: Counting objects: 100% (22/22), done.
|
||||
remote: Compressing objects: 100% (13/13), done.
|
||||
remote: Total 22 (delta 5), reused 21 (delta 4), pack-reused 0
|
||||
Receiving objects: 100% (22/22), done.
|
||||
Resolving deltas: 100% (5/5), done.
|
||||
123@
|
||||
init (merging origin/git-annex into git-annex...)
|
||||
(recording state in git...)
|
||||
(scanning for unlocked files...)
|
||||
Invalid command: 'git-annex-shell 'configlist' '/~/yarikoptic/testds2.git''
|
||||
You appear to be using ssh to clone a git:// URL.
|
||||
Make sure your core.gitProxy config option and the
|
||||
GIT_PROXY_COMMAND environment variable are NOT set.
|
||||
|
||||
Remote origin does not have git-annex installed; setting annex-ignore
|
||||
|
||||
This could be a problem with the git-annex installation on the remote. Please make sure that git-annex-shell is available in PATH when you ssh into the remote. Once you have fixed the git-annex installation, run: git annex enableremote origin
|
||||
(Auto enabling special remote gh-lfs...)
|
||||
|
||||
Specify url=
|
||||
ok
|
||||
(recording state in git...)
|
||||
get 123 (not available)
|
||||
Try making some of these repositories available:
|
||||
92ce3cfc-8c58-42db-8aa3-ea4d4b3a6011 -- yoh@hopa:/tmp/testds2
|
||||
c9132e68-e9d8-40b5-ba34-5d60a8b9c844 -- gh-lfs
|
||||
|
||||
(Note that these git remotes have annex-ignore set: origin)
|
||||
failed
|
||||
git-annex: get: 1 failed
|
||||
"""]]
|
||||
|
||||
so I had to enableremote it while providing URL I become able to `get` the file:
|
||||
|
||||
[[!format sh """
|
||||
$> git annex enableremote gh-lfs autoenable=true type=git-lfs url=git@github.com:yarikoptic/testds2.git encryption=none && git annex get 123
|
||||
enableremote gh-lfs ok
|
||||
(recording state in git...)
|
||||
get 123 (from gh-lfs...)
|
||||
(checksum...) ok
|
||||
(recording state in git...)
|
||||
"""]]
|
||||
|
||||
|
||||
Shouldn't that URL be recorded in remote.log? (similarly to `type=git` remotes)
|
||||
|
||||
[[!meta author=yoh]]
|
||||
[[!tag projects/dandi]]
|
|
@ -0,0 +1,51 @@
|
|||
### Please describe the problem.
|
||||
|
||||
Git-annex-uninit leaves the underlying git-repo in an unusable state because it does not revert whatever modifications the current git-annex version does to git-add.
|
||||
|
||||
### What steps will reproduce the problem?
|
||||
|
||||
git init Test
|
||||
cd Test
|
||||
touch test1.txt; git add test1.txt; git commit -m "initial commit"
|
||||
git annex init
|
||||
git annex uninit
|
||||
touch test2.txt
|
||||
git add test2.txt
|
||||
|
||||
The last command fails with:
|
||||
|
||||
git-annex: First run: git-annex init
|
||||
error: external filter 'git-annex smudge --clean -- %f' failed 1
|
||||
error: external filter 'git-annex smudge --clean -- %f' failed``
|
||||
|
||||
|
||||
### What version of git-annex are you using? On what operating system?
|
||||
|
||||
git-annex version: 7.20190912-gab739242a3
|
||||
build flags: Assistant Webapp Pairing S3 WebDAV Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite
|
||||
dependency versions: aws-0.21.1 bloomfilter-2.0.1.0 cryptonite-0.26 DAV-1.3.3 feed-1.2.0.0 ghc-8.6.5 http-client-0.6.4 persistent-sqlite-2.10.5 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.0
|
||||
key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL
|
||||
remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs hook external
|
||||
operating system: linux x86_64
|
||||
supported repository versions: 7
|
||||
upgrade supported from repository versions: 0 1 2 3 4 5 6
|
||||
local repository version: 7
|
||||
|
||||
|
||||
### Please provide any additional information below.
|
||||
|
||||
I accidentally discovered this after a system upgrade that included the latest version of git-annex. I wasn't aware of git-add's new behaviour, and so my workflow suddenly failed silently: I was used to manually using git-annex for big files, but from day to day, I just used git normally and synced my workstations through a remote that didn't have git-annex. Now the content of my new files didn't propagate any more; I was mystified, since the files that didn't propagate looked normal (they weren't symlinks, as I was used to for annexed files, and at first I couldn't figure out how to know whether they were annexed or not). As I was in a rush and didn't have access to the internet to clear things up, I kind of panicked and tried getting rid of git-annex before it did more damage…
|
||||
|
||||
[[!format sh """
|
||||
# If you can, paste a complete transcript of the problem occurring here.
|
||||
# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log
|
||||
|
||||
|
||||
# End of transcript or log.
|
||||
"""]]
|
||||
|
||||
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
|
||||
|
||||
I love git-annex (a brilliantly designed piece of software in my view) and have been using it a lot for years!
|
||||
|
||||
> Thanks for pointing out this oversight. [[fixed|done]] --[[Joey]]
|
1
doc/bugs/git_keeps_refreshing_index.mdwn
Normal file
1
doc/bugs/git_keeps_refreshing_index.mdwn
Normal file
|
@ -0,0 +1 @@
|
|||
Since upgrading to git-annex 7.20190912, when doing `git status` I keep getting the message `Refresh index:`, and there is a delay until it turns into something like `Refresh index: 100% (601422/601422), done.`. I don't recall this happening with earlier `git-annex` versions. Have others seen this? (Not a "bug" in terms of correctness, but posting here as it affects usability and speed.)
|
|
@ -0,0 +1,13 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2019-10-08T16:22:16Z"
|
||||
content="""
|
||||
When using v7 repositories, git will do this when there are unlocked files
|
||||
that have had their content changed.
|
||||
|
||||
git only displays that message when it thinks its going to have to
|
||||
do a lot of work (re-smudging a lot of files), so you'd mostly see this
|
||||
happen in a larger repository that has had a lot of (unlocked) files change
|
||||
in some way.
|
||||
"""]]
|
|
@ -0,0 +1,13 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 2"""
|
||||
date="2019-10-08T16:37:49Z"
|
||||
content="""
|
||||
I noticed that `git annex drop` of an unlocked file causes the next `git
|
||||
status` to re-smudge the file. That's surprising because git-annex
|
||||
internally updates the index using git update-index, so git should not see
|
||||
any need to revisit it.
|
||||
|
||||
So my guess is you were getting or dropping a lot of unlocked files when
|
||||
you saw that.
|
||||
"""]]
|
|
@ -0,0 +1,26 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 3"""
|
||||
date="2019-10-08T16:43:49Z"
|
||||
content="""
|
||||
Dropping a single unlocked file and then running git
|
||||
status with `GIT_TRACE=1` replicates the additional smudge every time.
|
||||
|
||||
The index before and after git status smudging have identical content, so
|
||||
git-annex seems to have updated it correctly already. It seems that the
|
||||
mtime of the index file is causing git to do that one additional smudge.
|
||||
|
||||
Eg, the file git-annex dropped had a mtime of 13:02:43.716964483. The
|
||||
~/index file that git-annex generated has a mtime of 13:02:43.752964130
|
||||
which is newer, but only by a fraction of a second. So, git probably
|
||||
assumes the mtimes are sufficiently close that it can't trust that the
|
||||
index file really reflects the current content of the work tree file. And
|
||||
so it re-smudges the work tree file unncessarily.
|
||||
|
||||
I have not been able to find a number of files to drop that replicates
|
||||
the bug report. When a lot of files are dropped, it starts taking
|
||||
sufficiently long to update the index file that it ends up with a newer
|
||||
timestamp, which avoids the unncessary additional smudging. The worse
|
||||
case I have found here is dropping 9 files causes all 9 to get re-smudged,
|
||||
but that's not enough to get git to use the progress display.
|
||||
"""]]
|
|
@ -0,0 +1,19 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 4"""
|
||||
date="2019-10-08T17:32:52Z"
|
||||
content="""
|
||||
Anyway, if git-annex could preserve the mtime of an unlocked file when
|
||||
writing its pointer file or when populating it with content, that would
|
||||
avoid the unncessary smudging. (Which seems better than adding a delay when
|
||||
updating the index file, or setting the index's mtime ahead of the current
|
||||
time..)
|
||||
|
||||
That's easier done for pointer files than populated files, because
|
||||
with annex.thin the content is a hard link and it would probably not be
|
||||
good to change its mtime.
|
||||
|
||||
For now, I didn't do it extensively, but only in depopulatePointerFile.
|
||||
That was enough to eliminate the unncessary smudging after drop that I was
|
||||
seeing.
|
||||
"""]]
|
|
@ -0,0 +1,11 @@
|
|||
[[!comment format=mdwn
|
||||
username="branchable@bafd175a4b99afd6ed72501042e364ebd3e0c45e"
|
||||
nickname="branchable"
|
||||
avatar="http://cdn.libravatar.org/avatar/ae41dba34ee6000056f00793c695be75"
|
||||
subject="comment 2"
|
||||
date="2019-09-30T22:25:11Z"
|
||||
content="""
|
||||
Given that there is one remotedaemon process per repository, if a user has (say) 10 of them running and wants to stop a particular one, what is the expectation of how they would do this? Presumably it would involve something like searching the process table for a remotedaemon process whose cwd is the repository in question. I can't think of any trivial one-liner to do this, since the usual suspects like `pkill` / `ps` / `pidof` etc. do not support filtering by cwd.
|
||||
|
||||
So an advantage of implementing pidfiles and `--stop` would be that each user doesn't have to worry about such details.
|
||||
"""]]
|
9
doc/devblog/day_602__sameas.mdwn
Normal file
9
doc/devblog/day_602__sameas.mdwn
Normal file
|
@ -0,0 +1,9 @@
|
|||
Plenty of stuff going in that has not made the blog. Now I'm working on a
|
||||
sort of major feature.
|
||||
[[todo/support_multiple_special_remotes_with_same_uuid]] will fill in a odd
|
||||
little hole in git-annex's capabilities. I think it will turn out to be
|
||||
more useful than it appears. But it's major not so much in what it will
|
||||
allow, but in how many assumptions in the git-annex code base have to be
|
||||
worked around to implement it. After pondering lots of approaches, I have
|
||||
finally gotten stuck in to implementing it, and I've made some good
|
||||
progress today (on the `sameas` branch). I might finish it tomorrow.
|
|
@ -0,0 +1,9 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="git-annex-add on a file that is being written by some process"
|
||||
date="2019-10-08T17:03:33Z"
|
||||
content="""
|
||||
Does `git-annex-add` also check for actively-written files, and wait until they're no longer being written before adding them?
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,15 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 4"""
|
||||
date="2019-10-08T19:33:39Z"
|
||||
content="""
|
||||
It does not, mostly because it would need to run lsof once per file, which
|
||||
would be significantly expensive. The daemon is able to batch changes and
|
||||
so run lsof less often.
|
||||
|
||||
However, git-annex add does detect if a file is modified while it's being
|
||||
hashed, and will avoid adding it then. And it prevents against most ways
|
||||
the file could be modified except for if something has it open for write
|
||||
beforehand. In the worst case, the object in the annex gets written to,
|
||||
which later gets detected by fsck.
|
||||
"""]]
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 5"""
|
||||
date="2019-10-08T19:40:32Z"
|
||||
content="""
|
||||
The assistant can display a gui, but it can also run headless.
|
||||
|
||||
Anyway, it's fine to use `git annex sync` or other things that move content
|
||||
around while `git annex watch` is running.
|
||||
"""]]
|
|
@ -0,0 +1,9 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="preventing git-annex-add of files open for writing"
|
||||
date="2019-10-08T20:14:41Z"
|
||||
content="""
|
||||
\"except for if something has it open for write beforehand\" -- maybe, `git-annex-add` could do a pre-pass where it finds all files to be added, chmods them to read-only to stop new open-for-write attempts, then runs lsof once to find any existing ones? The files could be hardlinked into one temp dir then chmod and lsof run on that dir.
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,12 @@
|
|||
Following up on [DataLad's git-objects special remote idea PR](https://github.com/datalad/datalad/pull/3727#issuecomment-540116323), I have thought to try establishing git-lfs powered (special) remote on github, within assistant (installed a few days back) ran within Termux, on an android phone.
|
||||
|
||||
Within assistant web UI I did not find a good match for this scenario in "Add more repositories". What would it be (if any exists/defined already)?
|
||||
|
||||
Having found none, I've decided to try the closest match -- "remote server using ssh". But in that form, for some reason, after entering "github.com" for "Host name" results in "cannot resolve host name".
|
||||
|
||||
- I verified that github.com is reachable in chrome
|
||||
- I thought that may be Termux "session" doesn't have access to DNS, so I switched to termux (where I started `git annex webapp`), and ran `ping github.com`.
|
||||
- it did resolve the IP (to 192.30.253.113)
|
||||
- but no ping came back
|
||||
|
||||
[[!meta author=yoh]]
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="yarikoptic"
|
||||
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
|
||||
subject="comment 1"
|
||||
date="2019-10-09T18:10:05Z"
|
||||
content="""
|
||||
FWIW -- seems to be a generic (not github.com specific) DNS issue, since fails to resolve other names as well.
|
||||
|
||||
In the dashboard there is a Warning about `RemoteControl crashed: user error (nice [... remotedaemon... exited 1)` -- could be related?
|
||||
"""]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="separate annex.git-add.largefiles and annex.git-annex-add.largefiles settings"
|
||||
date="2019-10-10T18:33:50Z"
|
||||
content="""
|
||||
[[separate `annex.git-add.largefiles` and `annex.git-annex-add.largefiles` settings|todo/separate_annex.largefiles.git-add_and_annex.largefiles.git-annex-add_settings]] would let you configure `git add` to only add to git while still letting `git annex add` decide what gets annexed.
|
||||
"""]]
|
|
@ -0,0 +1,21 @@
|
|||
[[!comment format=mdwn
|
||||
username="Dwk"
|
||||
avatar="http://cdn.libravatar.org/avatar/65fade4f1582ef3f00e9ad6ae27dae56"
|
||||
subject="Perhaps a good behaviour but only if largefiles is set"
|
||||
date="2019-10-05T02:34:42Z"
|
||||
content="""
|
||||
This is indeed a sane default for people who want to annex every file. It is also a nice behaviour as soon as largefiles is set (it simplifies one's workflows and avoids errors).
|
||||
|
||||
However, it makes little sense as a default for people who use git-annex to manage some large files inside a normal git repo. They are basically forced to configure largefiles, since out-of-the-box git-annex now essentially breaks git: as Ilya points out, it breaks a very standard git workflow – you add a file, you push, you pull in another clone, and you then expect to have the contents of the file. (Worse, it does it in a silent way: since git-add adds the file unlocked, there is no straightforward way of noticing that the file has, in fact, been annexed and therefore that git won't be able to sync it.)
|
||||
|
||||
At bottom, the problem is to accommodate two very different groups of users.
|
||||
|
||||
It would require more thought, but I would favor a solution like the following:
|
||||
|
||||
1. modify git-add's behaviour *only if* largefiles is set;
|
||||
2. explain carefully in the doc that largefiles will alter git-add's behaviour (I believe git-annex should modify the underlying git behaviour as little as possible and not without due warning);
|
||||
3. warn in the doc that without a largefile setting, some unfortunate errors (those you mention in your comments) become likely, so as to make the advantages of a largefile settings clear;
|
||||
4. perhaps, add a question when doing git-annex-init: are you planning to use git-annex to manage all your files? If yes, set `largefiles=anything` and warn that git-add will now add things to the annex; if no, do not set largefiles and thus keep the current default until the user decides otherwise.
|
||||
|
||||
Disclaimer: my judgment may be clouded by the fact that I was unpleasantly taken by surprise by the change (and lost a few hours of work to this, until I got access to the internet and figured out the issue): upon upgrading, I felt like git-annex had done some kind of man-in-the-middle attack on my normal git…
|
||||
"""]]
|
|
@ -0,0 +1,36 @@
|
|||
[[!comment format=mdwn
|
||||
username="CandyAngel"
|
||||
avatar="http://cdn.libravatar.org/avatar/15c0aade8bec5bf004f939dd73cf9ed8"
|
||||
subject="comment 6"
|
||||
date="2019-10-07T08:30:52Z"
|
||||
content="""
|
||||
If you want to add the file to git, use `git add`.
|
||||
If you want to add the file to git-annex, use `git annex add`.
|
||||
Simples!
|
||||
|
||||
There isn't any other behaviour which is a more obvious default.
|
||||
|
||||
> Suppose you have an unlocked file in your repo, and you rename it (not using git move), and then git add it. Oops, now you've added to git a large file that you wanted to be annexed
|
||||
|
||||
If you wanted it to be annexed, you should have `git annex add`'d it! git-annex doesn't (and can't) know that the user wanted something different from the totally valid command they issued.
|
||||
|
||||
> you would surely hope that the annexed ones stay annexed and don't get committed directly to git
|
||||
|
||||
If the modified file changes its match state from largefiles (e.g. crossing a filesize threshold), it would still change state between annexed/non-annexed, wouldn't it?
|
||||
|
||||
> keeping track of which files are supposed to be in the annex and which in git is very failure prone
|
||||
> And it needs to default to adding files to the annex, otherwise the above two cases can cause problems.
|
||||
|
||||
Not only is it failure prone, the only thing that knows which is wanted is.. the user. The decision to usurp git and the user creates the first 2 problem cases. If you go with the expectation that the user will issue the correct commands for what they want to happen (fair, considering only the user knows), the first two cases are obviously not problems.
|
||||
|
||||
> If git add does something the user doesn't want
|
||||
|
||||
Why would it? It just adds files to git, right?
|
||||
|
||||
> Recovery [..] from [adding file to git] can be arbitrarily compilicated, including needing to fix problems in clones on other people's computers.
|
||||
|
||||
And this can still totally happen if largefiles is not set correctly for what the user wants.
|
||||
|
||||
|
||||
Sure, you can set up git-annex to do magic to make your workflow easier or more seamless. Key words there being \"*set up*\". It shouldn't be doing such magic by default.
|
||||
"""]]
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="comment 7"
|
||||
date="2019-10-07T18:20:21Z"
|
||||
content="""
|
||||
Related: [[todo/addunlocked_config_setting]].
|
||||
|
||||
Re: this thread, I also think preserving `git add` default behavior (adding to git) is better. I'm not sure it should *always* add to git. The whole point of v7 was, as I understand it, to make it possible to use normal git workflow (`git add`; `git commit`; [make changes]; `git add`; `git commit`) with large files without thinking about it. Existing scripts that just call `git add` and are unaware of `git-annex-add` would still work. So it makes sense to let `git add` add to annex *when explicitly configured*. In my use case, I'd like to configure it so that any files it adds to the annex are added as locked by default.
|
||||
"""]]
|
|
@ -0,0 +1,15 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 8"""
|
||||
date="2019-10-08T18:03:05Z"
|
||||
content="""
|
||||
Candyangel, you missed this part of my comment:
|
||||
|
||||
> But `mv foo bar; git add bar` is normally identical to `git mv foo bar`.
|
||||
> Why should using git-annex break that identity?
|
||||
|
||||
With locked files, that identity still holds; you can mv a symlink and git
|
||||
add it, and you again have an annexed file. So every git and git-annex
|
||||
repository has always behaved that way. There are innumerable workflows and
|
||||
documentation that depend on that, in big or small ways.
|
||||
"""]]
|
|
@ -0,0 +1,12 @@
|
|||
[[!comment format=mdwn
|
||||
username="CandyAngel"
|
||||
avatar="http://cdn.libravatar.org/avatar/15c0aade8bec5bf004f939dd73cf9ed8"
|
||||
subject="comment 9"
|
||||
date="2019-10-09T07:53:43Z"
|
||||
content="""
|
||||
I'm not sure which part of my post you are responding to..
|
||||
|
||||
Using `git add` with symlinks makes sense because you are adding the symlink, not the file, to git.
|
||||
|
||||
But we are talking about actual files (because unlocked or non-annexed), right? Where `git add` would add it to git instead of the annex.. which makes sense because the command for adding a *file* to git-annex is `git annex add`, not `git add`.
|
||||
"""]]
|
20
doc/forum/slow_s3_transfer.mdwn
Normal file
20
doc/forum/slow_s3_transfer.mdwn
Normal file
|
@ -0,0 +1,20 @@
|
|||
Hi,
|
||||
|
||||
I am experiencing slow transfer (UL/DL) speed with an S3-compatible remote.
|
||||
Here is the configuration:
|
||||
|
||||
-Minio server for S3 storage in docker.
|
||||
|
||||
-Nginx for https in docker.
|
||||
|
||||
-git-annex (and datalad) with the S3 set as a remote, without public URL to control access.
|
||||
|
||||
I tried to get files directly from the server to remove all the network factors.
|
||||
|
||||
When downloading from the Minio server through nginx with wget, I have transfer rates of about 50-60MB/s.
|
||||
|
||||
When running `git annex get` on the same file, the transfer rate is about 6-7MB/s.
|
||||
|
||||
Any idea why git-annex would slow the transfer?
|
||||
|
||||
Thanks
|
|
@ -0,0 +1,11 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2019-10-08T18:48:28Z"
|
||||
content="""
|
||||
Nothing immediately comes to mind, except perhaps wget is using a larger
|
||||
buffer or significantly less CPU or something like that.
|
||||
|
||||
Is git-annex CPU bound when doing this? Ie, does top show it using 100% of
|
||||
a CPU?
|
||||
"""]]
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="bugmenot"
|
||||
avatar="http://cdn.libravatar.org/avatar/d3f7106e378e83d54c010c230ff074b9"
|
||||
subject="--link accepts itself"
|
||||
date="2019-10-10T05:28:53Z"
|
||||
content="""
|
||||
Is there a good reason why `git annex p2p --link` will accept addresses that points to itself?
|
||||
|
||||
I'm trying to script automatic addition of .onion peers, and it's a bit annoying to have to filter the .onion pointing to the current repo instead of just adding all of the .onion urls and having it ignore/warn/fail on the one that is currently in .git/annex/creds/p2paddrs
|
||||
"""]]
|
|
@ -1,22 +0,0 @@
|
|||
git-annex 7.20190626 released with [[!toggle text="these changes"]]
|
||||
[[!toggleable text="""
|
||||
* get, move, copy, sync: When -J or annex.jobs has enabled concurrency,
|
||||
checksum verification uses a separate job pool than is used for
|
||||
downloads, to keep bandwidth saturated.
|
||||
* Other commands also run their cleanup phase using a separate job pool
|
||||
than their perform phase, which may make some of them somewhat faster
|
||||
when running concurrently as well.
|
||||
* When downloading an url and the destination file exists but is empty,
|
||||
avoid using http range to resume, since a range "bytes=0-" is an unusual
|
||||
edge case that it's best to avoid relying on working. This is known to
|
||||
fix a case where importfeed downloaded a partial feed from such a server.
|
||||
* importfeed: When there's a problem parsing the feed, --debug will
|
||||
output the feed content that was downloaded.
|
||||
* init: Fix a reversion in the last release that prevented automatically
|
||||
generating and setting a description for the repository.
|
||||
* add: Display progress meter when hashing files.
|
||||
* add: Support --json-progress option.
|
||||
* The Linux standalone arm build now works again on CPU versions below
|
||||
arm6. Thanks to Emanuele Olivetti, Ilias Tsitsimpis, Bernhard
|
||||
Übelacker, and Adrian Bunk for fixing ghc in Debian (bug #928882).
|
||||
* OSX dmg: Put git-annex's version in the Info.plist file."""]]
|
|
@ -1,3 +0,0 @@
|
|||
git-annex 7.20190708 released with [[!toggle text="these changes"]]
|
||||
[[!toggleable text="""
|
||||
* Fix find --json to output json once more."""]]
|
29
doc/news/version_7.20191009.mdwn
Normal file
29
doc/news/version_7.20191009.mdwn
Normal file
|
@ -0,0 +1,29 @@
|
|||
git-annex 7.20191009 released with [[!toggle text="these changes"]]
|
||||
[[!toggleable text="""
|
||||
* Fix bug in handling of annex.largefiles that use largerthan/smallerthan.
|
||||
When adding a modified file, it incorrectly used the file size of the
|
||||
old version of the file, not the current size.
|
||||
* Added --mimetype and --mimeencoding file matching options.
|
||||
* Added --unlocked and --locked file matching options.
|
||||
* Added adjust --lock, to enter an adjusted branch where files are locked.
|
||||
* git-lfs: Added support for http basic auth.
|
||||
* git-lfs: Only do endpoint discovery once when concurrency is enabled.
|
||||
* fsck --incremental/--more: Fix bug that prevented the incremental fsck
|
||||
information from being updated every 5 minutes as it was supposed to be;
|
||||
it was only updated after 1000 files were checked, which may be more
|
||||
files that are possible to fsck in a given fsck time window.
|
||||
Thanks to Peter Simons for help with analysis of this bug.
|
||||
* Test: Use more robust directory removal when built with directory-1.2.7.
|
||||
* Close sqlite databases more robustly.
|
||||
* remotedaemon: Don't list --stop in help since it's not supported.
|
||||
* enable-tor: Run kdesu with -c option.
|
||||
* enable-tor: Use pkexec to run command as root when gksu and kdesu are not
|
||||
available.
|
||||
* When dropping an unlocked file, preserve its mtime, which avoids
|
||||
git status unncessarily running the clean filter on the file.
|
||||
* uninit: Remove several git hooks that git-annex init sets up.
|
||||
* uninit: Remove the smudge and clean filters that git-annex init sets up.
|
||||
* Work around git cat-file --batch's odd stripping of carriage return
|
||||
from the end of the line (some windows infection), avoiding crashing
|
||||
when the repo contains a filename ending in a carriage return.
|
||||
* git-annex-standalone.rpm: Fix the git-annex-shell symlink."""]]
|
|
@ -24,6 +24,7 @@ the git history is not stored in them.
|
|||
* [[tor]]
|
||||
* [[web]]
|
||||
* [[webdav]]
|
||||
* [[git]]
|
||||
* [[xmpp]]
|
||||
|
||||
The above special remotes are built into git-annex, and can be used
|
||||
|
|
30
doc/special_remotes/git.mdwn
Normal file
30
doc/special_remotes/git.mdwn
Normal file
|
@ -0,0 +1,30 @@
|
|||
Normally a git repository is not treated as a special remote, but as a git
|
||||
remote of the normal kind. Two exceptions to that are [[git-lfs]] and
|
||||
[[gcrypt]] special remotes.
|
||||
|
||||
But it is possible to register a git repository as a special remote.
|
||||
git-annex and git will use the remote the same as any normal git remote,
|
||||
but its url will be recorded in the repisitory. One benefit of doing this
|
||||
is it allows [[git-annex init|git-annex-init]] to autoenable the remote.
|
||||
|
||||
First you need a regular git remote with the url that you want to use for
|
||||
the special remote.
|
||||
|
||||
git remote add tmpremote ssh://...
|
||||
|
||||
Then, to set up the special remote:
|
||||
|
||||
git annex initremote myremote type=git location=ssh://... autoenable=true
|
||||
|
||||
The location must be the same url as the existing git remote.
|
||||
|
||||
Now `git annex init` in each clone of the repository will autoenable myremote.
|
||||
|
||||
Note that the name of the git remote (`tmpremote` above) has to be
|
||||
different than the name you later use for the special remote,
|
||||
since [[git-annex initremote|git-annex-initremote]] will refuse to use the name of an existing
|
||||
remote. To work around that, you could finish by removing `tmpremote` and
|
||||
enable the special remote:
|
||||
|
||||
git remote remove tmpremote
|
||||
git annex enableremote myremote
|
|
@ -67,3 +67,5 @@ James Read,
|
|||
Luke Shumaker,
|
||||
Marius Konitzer,
|
||||
Ryan Rix,
|
||||
Svenne Krap,
|
||||
Jelmer Vernooij,
|
||||
|
|
|
@ -140,6 +140,7 @@ When you have a file that is currently stored in the annex, and you want to
|
|||
convert that to be stored in git, here's how to accomplish that:
|
||||
|
||||
git annex unlock file
|
||||
git rm --cached file
|
||||
git -c annex.largefiles=nothing add file
|
||||
git commit file
|
||||
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
[[!comment format=mdwn
|
||||
username="Dwk"
|
||||
avatar="http://cdn.libravatar.org/avatar/65fade4f1582ef3f00e9ad6ae27dae56"
|
||||
subject="annexed -> normal git does not work if annexed file was unlocked"
|
||||
date="2019-10-05T01:45:14Z"
|
||||
content="""
|
||||
The sequence of commands given to shift an unlocked annexed file to normal git (for instance a file annexed by mistake due to the new git-add behaviour), namely
|
||||
|
||||
git annex unlock file
|
||||
git -c annex.largefiles=nothing add file
|
||||
git commit file
|
||||
|
||||
does not work if the file is unmodified, as git sees no change to commit. In this case, I believe one should replace the first command with
|
||||
|
||||
git annex unannex file
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""Re: annexed -> normal git does not work if annexed file was unlocked"""
|
||||
date="2019-10-08T18:14:45Z"
|
||||
content="""
|
||||
Touching the file first avoids the problem, or git rm --cached to force
|
||||
git to re-add it. I've updated the example.
|
||||
"""]]
|
16
doc/todo/Describe_a_file_in_function_of_another_file.mdwn
Normal file
16
doc/todo/Describe_a_file_in_function_of_another_file.mdwn
Normal file
|
@ -0,0 +1,16 @@
|
|||
Hello,
|
||||
|
||||
In order to save space/bandwith/... I would like to create a way to describe a file compared to another. You could see this as a kind of very special "remote" (which is local :P), that says "To produce file XXX, take file YYY, and run command CCC with args YYY".
|
||||
|
||||
You may ask why it is useful? I have several usecases:
|
||||
|
||||
1) my first usecase is that I would like to be able to generate thumbnails for my pictures in order to speed up display. A thumbnail can be easily created from a picture (with for example the convert command), but when you don't need the thumbnail, you may prefer to remove them locally to save space.
|
||||
|
||||
2) similarly, I have some RAW photo files, and a script to turn them into .JPG file. Or even better, I could have several scripts to convert my initial RAW files into several .JPG files, with different parameters/look. Keeping both RAW and developed JPG can be heavy, so this kind of tool could allow me to remove the .JPG file(s) when I don't need them anymore, so I don't mind to drop the .JPG file as soon as the RAW does exist (but if the RAW does not exist anymore, I shouldn't be able to remove the .JPG of course).
|
||||
|
||||
3) I also have on my desktop some compressed files (.iso for example, or old projects). Most of the time, I don't really need to keep the uncompressed .iso, but from time to time, I may need them. For now I manually uncompress them, use them, and delete them... But it could be cool to let git-annex deal with them automatically.
|
||||
|
||||
Does git-annex provide such functionnality? If not, do you think it could be implementable?
|
||||
|
||||
Thanks!
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="comment 1"
|
||||
date="2019-10-03T00:36:54Z"
|
||||
content="""
|
||||
See [related discussion](https://github.com/datalad/datalad/issues/2850)
|
||||
"""]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="tobiasBora"
|
||||
avatar="http://cdn.libravatar.org/avatar/80a7d8c2a7b475b6b71198cce0faa6b0"
|
||||
subject="comment 2"
|
||||
date="2019-10-03T00:48:23Z"
|
||||
content="""
|
||||
So if I get it, it's interesting, but not implemented, and mayae tricky to implement? (The discution is dead now)
|
||||
"""]]
|
|
@ -0,0 +1,12 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="special remote that restores contents by running a command"
|
||||
date="2019-10-04T18:16:39Z"
|
||||
content="""
|
||||
I still think it's doable and worth doing, don't have the bandwidth right now to implement it, but can help brainstorm. If you're interested in working on it, post in the [github thread](https://github.com/datalad/datalad/issues/2850), and maybe we can refine the design.
|
||||
|
||||
The key issues are: (1) you can't just [[`git-annex-copy`|git-annex-copy]] a file to this remote, you'll need to use [[`git-annex-setpresentkey`|git-annex-setpresentkey]] and [[`git-annex-registerurl`|git-annex-registerurl]] to record that contents with a given key can be obtained by running a given command, and (2) the result of running a command depends not just on the command line and the input file(s), but also on the environment in which the command is run, so to get bit-for-bit reconstruction of the contents you'd need to use Docker, or at least something like [conda](https://docs.conda.io/en/latest/). But even then, sometimes the exact output file depends on the current time or the name of some intermediate tempfile. So unless the command is 100% deterministic, re-running the command might produce contents that does not match the [[git-annex key|backends]].
|
||||
|
||||
For local use, you could make a simple webserver that handles URLs like `http://localhost:3000/cgi-bin/make_thumbnail.sh?orig_file_key=MD5-xxxxxx` , and have the CGI script run [[`git-annex-get --key`|git-annex-get]] to get the file contents and then extract the thumbnail and return that. Then you can use [[`git-annex-addurl`|git-annex-addurl]] to store the file in git-annex.
|
||||
"""]]
|
5
doc/todo/addunlocked_config_setting.mdwn
Normal file
5
doc/todo/addunlocked_config_setting.mdwn
Normal file
|
@ -0,0 +1,5 @@
|
|||
Can the `annex.addunlocked` be extended to have the same syntax as `annex.largefiles`? Also, can there be separate settings affecting `git add` and `git annex add`, e.g. `annex.git-add.addunlocked` and `annex.git-annex-add.addunlocked`, with both defaulting to the value of `annex.addunlocked` if not set?
|
||||
|
||||
Basically, I want a reliable way to prevent inadvertently adding files as annexed unlocked files.
|
||||
|
||||
Related: [[forum/lets_discuss_git_add_behavior]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2019-10-08T18:35:06Z"
|
||||
content="""
|
||||
It is not possible for `git add` to add files in locked form. git's
|
||||
interface simply does not allow that.
|
||||
"""]]
|
|
@ -12,7 +12,10 @@ That might need changes to the Remote setup method, not sure.
|
|||
Problem: Many urls could be used to clone. http and ssh are the obvious
|
||||
two. url=http:// url=ssh:// won't work, only one value will be used.
|
||||
url1= url2= is annoying for the user, especially if they later want to add
|
||||
another url with enableremote and have to work out the number.
|
||||
another url with enableremote and have to work out the number. Could
|
||||
make enableremote with a new url= add that as urlN=.
|
||||
[[support_multiple_special_remotes_with_same_uuid]] would solve it, perhaps
|
||||
in a cleaner way.
|
||||
|
||||
> If each url is treated as a separate special remote (which makes a lot of sense
|
||||
> by analogy with how regular git remotes work), then
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="automatic retries if index is locked"
|
||||
date="2019-10-08T16:01:52Z"
|
||||
content="""
|
||||
As a concrete example, if the `index.lock` file exists and has relatively recent mtime and a git process is running, it would help if git-annex could be configured to retry, up to a given number of times with increasing delays between retries, the operation that failed because the index is locked.
|
||||
|
||||
Also, from the log
|
||||
[[!format sh \"\"\"
|
||||
add metadata_orig.json ok
|
||||
(recording state in git...)
|
||||
fatal: Unable to create '/ssd/crogrun_191008_043145__8684__/viral-ngs-benchmarks/.git/index.lock': File exists.
|
||||
\"\"\"]]
|
||||
|
||||
it looks like the index.lock conflict is due to writing the [[git-annex branch|internals/#The_git-annex_branch]]? I thought git-annex used a separate index for that?
|
||||
|
||||
"""]]
|
|
@ -0,0 +1 @@
|
|||
Sometimes you want to operate on files touched by commits in a range, e.g. to `git-annex-copy` files added in the last 10 commits to an S3 special remote. Could the option be added, to commands that take a path to operate on, to give a commit range, with the meaning "operate on files changed by these commits"?
|
|
@ -0,0 +1,9 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2019-10-08T16:09:35Z"
|
||||
content="""
|
||||
If you have a command that generates a list of files changed by a series of
|
||||
commits, you can simply pass that list into --batch which is supported by
|
||||
most commands.
|
||||
"""]]
|
|
@ -1,3 +1,3 @@
|
|||
One way I've lost data is to git-annex-add it in an untrusted temp clone of a repo, then commit and push the git branch, but forget to git-annex-copy the annexed contents reference by that branch to a GloballyAvailable, (semi-)trusted remote. Then, when the temp clone is gone, the branch pushed to the repo is referencing permanently dead files. Maybe, git-annex-init could install a pre-push hook to check for this, and abort the push if it happens? Basically, to ensure that whatever data is referenced by pushed branches will actually be at least potentially get-table.
|
||||
One way I've lost data is to git-annex-add it in an untrusted temp clone of a repo, then commit and push the git branch, but forget to git-annex-copy the annexed contents referenced by that branch to a GloballyAvailable, (semi-)trusted remote. Then, when the temp clone is gone, the branch pushed to the repo is referencing permanently dead files. Maybe, git-annex-init could install a pre-push hook to check for this, and abort the push if it happens? Basically, to ensure that whatever data is referenced by pushed branches will actually be at least potentially get-table.
|
||||
|
||||
Even if the current repo is not temp/untrusted, when sharing data with someone, you may want to ensure that any annexed files referenced by a pushed branch are actually potentially available.
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="automaticallly sync content on git push/pull"
|
||||
date="2019-10-08T06:43:33Z"
|
||||
content="""
|
||||
Even better would be an option to automatically sync copy the content referenced by any pushed commits, to a specified remote; and/or, to automatically `git-annex-get` content referenced by any pulled commits. Then can use git-annex like git without needing to remember to `git-annex-sync`. [[todo/operate_on_files_affected_by_a_commit_range]] could be useful in implementing the hooks.
|
||||
"""]]
|
|
@ -0,0 +1,15 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 2"""
|
||||
date="2019-10-08T16:12:24Z"
|
||||
content="""
|
||||
Installing such a pre-push hook by default is certianly going to break
|
||||
existing workflows, so I don't see that happening. It would break my own
|
||||
workflows, unless perhaps the hook only does something when the repository
|
||||
is untrusted.
|
||||
|
||||
But I don't see anything preventing you from writing such a hook yourself.
|
||||
It should be easy enough to use `git annex whereis` or something like that
|
||||
to determine when you want to blick the push. If you need some additional
|
||||
query facility in git-annex to write it, we can talk about that.
|
||||
"""]]
|
|
@ -0,0 +1,5 @@
|
|||
Could there be separate `annex.git-add.largefiles` and `annex.git-annex-add.largefiles` settings, applying to files added via `git add` and `git annex add`, respectively? If not given, their value defaults to the value of `annex.largefiles`.
|
||||
|
||||
Reason: to prevent `git add` from inadvertently adding annexed files in unlocked form, I set `* annex.largefiles=nothing` at repo root; but then, `git annex add` won't annex anything either, unless specifically asked. I want to use `git add` to add files to git only (since it can't add them to git-annex in locked form), and to use `git annex add` to add files to either git or annex based on `annex.git-annex-add.largefiles` setting.
|
||||
|
||||
Related: [[forum/lets_discuss_git_add_behavior]]
|
|
@ -0,0 +1,25 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 10"""
|
||||
date="2019-10-08T15:25:05Z"
|
||||
content="""
|
||||
It might be possible to isolate the sameas changes only to things
|
||||
involving the location log. Use different uuids for sameas
|
||||
remotes. When updating the location log, substitute the sameas uuid.
|
||||
|
||||
There would need to be a sameas-aware way to check if a uuid is in the
|
||||
location log. Currently, loggedLocations is used to both see what remotes
|
||||
to try to get a key from, and for numcopies checking and related stuff
|
||||
(like skipping dropping entirely when loggedLocations does not have enough
|
||||
items in it). So there would need to be two variants of it. That seems
|
||||
likely to be a source of mistakes.
|
||||
|
||||
Another small problem with this idea is that a special remote may record
|
||||
its uuid somehow in the data store and check that it has the right uuid
|
||||
later (S3 does this with an "annex-uuid" in the bucket), and if two remotes
|
||||
with different uuids did that, there would be a conflict between them.
|
||||
|
||||
Also, it couldn't only be the location log; sameas mapping would also need
|
||||
to be done when using the chunk log. And a bit of encryption config
|
||||
inheritance would still be needed.
|
||||
"""]]
|
|
@ -0,0 +1,24 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 11"""
|
||||
date="2019-10-10T15:49:20Z"
|
||||
content="""
|
||||
Comment 6 talked about how to prevent old git-annex from getting confused
|
||||
when used in a repo with sameas remotes.
|
||||
|
||||
If remote.name.annex-uuid contains the uuid that sameas pointed to, then
|
||||
old git-annex will load the RemoteConfig for that uuid. Which is kind of
|
||||
... ok? The other gitconfig settings for the remote may or may not work
|
||||
with that RemoteConfig. But if accessing that remote fails with old
|
||||
git-annex, no problem. The only concerning thing I think would be if
|
||||
checkpresent somehow reported all content as missing from the remote... But
|
||||
if a misconfiguration of the gitconfig can do that, the special remote
|
||||
implementation is arguably already buggy.
|
||||
|
||||
So, I think it's ok to set remote.name.annex-uuid to the sameas
|
||||
uuid. There will need to be a new config key that indicates the uuid to
|
||||
get the RemoteConfig from.
|
||||
|
||||
Old git-annex enableremote still needs to be prevented from initializing a
|
||||
sameas remote, as it would set annex-uuid to the wrong uuid.
|
||||
"""]]
|
|
@ -3,12 +3,37 @@
|
|||
subject="""comment 4"""
|
||||
date="2019-10-01T16:26:12Z"
|
||||
content="""
|
||||
Started a `sameas` branch for this.
|
||||
|
||||
Logs.Remote.configSet will need some changes because it currently works
|
||||
on the basis of UUID, and so can't know when it's supposed to change a
|
||||
sameas remote. It will need an added RemoteName parameter.
|
||||
|
||||
The RemoteConfig is generated each run from the remote.log, and so the
|
||||
handling of sameas remotes needs to be done in Logs.Remote.readRemoteLog
|
||||
not by enableremote.
|
||||
|
||||
Logs.Remote.configSet will need some changes because it currently works
|
||||
on the basis of UUID, and so can't know when it's supposed to change a
|
||||
sameas remote. It seems it should instead work on the basis of the "name"
|
||||
field of the RemoteConfig.
|
||||
readRemoteLog makes a `Map UUID RemoteConfig`, which will need to
|
||||
change to `Map (UUID, RemoteName) RemoteConfig`
|
||||
|
||||
Digging into changing readRemoteLog, there are several problems. Here are
|
||||
some of the less tractable ones:
|
||||
|
||||
Remote.List.remoteGen looks up RemoteConfig by UUID. While it does have a
|
||||
Git remote and could look up the name of the remote from that, if the user
|
||||
renames a remote in .git/config, that would confuse it. That is not an
|
||||
acceptable tradeoff. So, a sameas remote would need to have some additional
|
||||
git config be set, giving the namespace that's used for it in the
|
||||
remote.log. If that's missing, it un-namespaced. initremote/enableremote
|
||||
need to set that git config.
|
||||
|
||||
Annex.SpecialRemote.autoEnable uses readRemoteLog. It would likewise need
|
||||
to look at the git config for namespace to tell which sameas remotes
|
||||
have been auto-enabled.
|
||||
|
||||
Preferred content looks at the preferreddir= value from RemoteConfig,
|
||||
and only a uuid is available. So it would have to look at the preferreddir
|
||||
values from all RemoteConfigs for remotes with that uuid and somehow pick
|
||||
one consistently. Or, preferreddir could be inherited like encryption
|
||||
settings are, and not allowed to be set in a sameas remote's config.
|
||||
"""]]
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 5"""
|
||||
date="2019-10-01T19:52:22Z"
|
||||
content="""
|
||||
Further problem with namespaces: If two people init new sameas remotes with
|
||||
the same uuid at the same time, on merge one of them will be lost.
|
||||
"""]]
|
|
@ -0,0 +1,30 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 6"""
|
||||
date="2019-10-01T19:53:06Z"
|
||||
content="""
|
||||
Revisiting the idea of using different uuids with a sameas= parameter:
|
||||
|
||||
If one remote is marked dead, it ought to be the one that sameas= points
|
||||
to, since that's the uuid in the location log. So that's ok.
|
||||
|
||||
As long as enableremote does not allow changing the sameas= paramter,
|
||||
sameas loops could only occur maliciously, not in normal operation.
|
||||
So it's fine to break such a loop in an arbitrary way.
|
||||
|
||||
There would need to be a way to prevent a remote with sameas= from being
|
||||
used by a version of git-annex that does not support it. One way would be
|
||||
to omit the name= parameter from remote.log, and use some other parameter
|
||||
for the name. Then old git-annex could not enableremote with the wrong uuid.
|
||||
|
||||
Using remote.name.annex-uuid-sameas=uuid instead of remote.name.annex-uuid
|
||||
would prevent old git-annex from using initialized sameas remotes.
|
||||
(Need a better name, since the uuid stored there should be the remote's own
|
||||
uuid (needed to get its RemoteConfig), not the one that sameas= points to.)
|
||||
|
||||
Seems that encryption parameter inheritance would happen the same way as
|
||||
has been discussed above. When constructing the RemoteConfig, copy over the
|
||||
encryption parameters from the parent remote.
|
||||
|
||||
All in all, using separate uuids instead of name= seems perhaps better.
|
||||
"""]]
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="different repos with same uuid"
|
||||
date="2019-10-02T15:13:55Z"
|
||||
content="""
|
||||
\"It is already possible of course for two git remotes to have the same uuid, and also for a special remote and git remotes to have the same uuid\" -- but, in general, that's a situation to be avoided, right? Other than two protocols accessing the same datastore, are there times when you'd want that?
|
||||
|
||||
(Related: [[`git-annex-reinit`|git-annex-reinit]], [[todo/reinit_current_repo_to_new_uuid]])
|
||||
"""]]
|
|
@ -0,0 +1,38 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 8"""
|
||||
date="2019-10-07T16:37:39Z"
|
||||
content="""
|
||||
Looked into the extent of changes needed for the sameas parameter approach.
|
||||
|
||||
The only thing that looks at the "name" parameter is Annex.SpecialRemote,
|
||||
so the new alternative name parameter for sameas remotes can be handled
|
||||
entirely there.
|
||||
|
||||
That's good, but its specialRemoteMap will need to be changed since
|
||||
it assumes each uuid has a single associated name, which stops being the case.
|
||||
|
||||
Either Annex.SpecialRemote or Logs.Remote.readRemoteLog will need to handle
|
||||
the sameas paramter. Both have their problems. Comment 4 discussed how
|
||||
changing readRemoteLog would cause difficulties for some callers. But if
|
||||
Annex.Special remote handles the sameas parameter, there will be times when
|
||||
a RemoteConfig contains sameas inherited encryption etc, and times when it
|
||||
does not. Would be worth making two different data types for those.
|
||||
|
||||
Remote.List.remoteGen gets the cached UUID and looks it up in the
|
||||
readRemoteLog map, so if readRemoteLog does not handle the sameas
|
||||
parameter, that will need to change to use something that does.
|
||||
|
||||
(There could be other readRemoteLog users that will similarly be problems.)
|
||||
|
||||
Logs.Remote.configSet will need to be changed as discussed in comment 4.
|
||||
|
||||
To avoid using remote.name.annex-uuid for sameas remotes,
|
||||
Remote.Helper.Special.gitConfigSpecialRemote will need to somehow know that
|
||||
it's a sameas remote. (It could look at the RemoteConfig for a sameas
|
||||
parameter.)
|
||||
|
||||
There are a couple of other places that set remote.name.annex-uuid,
|
||||
like Remote.GCrypt, so will need to factor out all setting of that into
|
||||
something that is sameas-aware.
|
||||
"""]]
|
|
@ -0,0 +1,17 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 9"""
|
||||
date="2019-10-08T15:17:06Z"
|
||||
content="""
|
||||
Per-remote state is an added complication. A sameas remote should not
|
||||
use the same per-remote state, because what's stored in it is up to the
|
||||
remote backend and would conflict.
|
||||
|
||||
So Logs.RemoteState would need to use something other than a UUID,
|
||||
which contains the underlying uuid of the sameas remote. (Logs.MetaData too for
|
||||
per-remote metadata.) That would have to be passed in when constructing the
|
||||
remote.
|
||||
|
||||
And, `git-annex forget` would need to be made to remote the per-remote state of
|
||||
sameas remotes that point to a dead uuid.
|
||||
"""]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="Ilya_Shlyakhter"
|
||||
avatar="http://cdn.libravatar.org/avatar/1647044369aa7747829c38b9dcc84df0"
|
||||
subject="comment 6"
|
||||
date="2019-10-08T20:23:06Z"
|
||||
content="""
|
||||
One more approach might be to configure `core.fsmonitor` to a custom one that reports missing unlocked files as unchanged even though they've been changed to symlinks.
|
||||
"""]]
|
|
@ -0,0 +1,17 @@
|
|||
[[!comment format=mdwn
|
||||
username="jason.dixon.email@aa0e536a2ec2877d6f666108dbbc6e39bbe87ac0"
|
||||
nickname="jason.dixon.email"
|
||||
avatar="http://cdn.libravatar.org/avatar/fbe9050fc83bbd536d307d87ea14d4bc"
|
||||
subject="wanted content commands"
|
||||
date="2019-10-10T08:16:59Z"
|
||||
content="""
|
||||
I've often thought it would be handy to have a preferred content expression and a requested files list that work together. Which is, I think, similar to what you're saying?
|
||||
|
||||
So you could do something like \"git annex wanted . --request <somefile>\" which would add it to a list of wanted files, that overrides the preferred content. Then you'd remove them also.
|
||||
|
||||
Use case for this would be requesting a file. Syncing the request to, say, a usb. Plugging that usb in somewhere else later, and having those files be automatically transferred. Then when the file reaches the destination it's dropped.
|
||||
|
||||
I guess this is possible already with the wanted expressions? What would that look like?
|
||||
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,17 @@
|
|||
[[!comment format=mdwn
|
||||
username="branchable@bafd175a4b99afd6ed72501042e364ebd3e0c45e"
|
||||
nickname="branchable"
|
||||
avatar="http://cdn.libravatar.org/avatar/ae41dba34ee6000056f00793c695be75"
|
||||
subject="There are still benefits to commit throttling"
|
||||
date="2019-09-30T22:16:10Z"
|
||||
content="""
|
||||
> The assistant does not commit files that are open for write.
|
||||
|
||||
Interesting; I see it uses lsof for this.
|
||||
|
||||
> So unless ffmpeg partially writes the file, then closes the file, then reopens it and writes some more, the assistant will only make a single commit.
|
||||
|
||||
OK, so that probably wasn't a good example. But that still doesn't negate my TODO list editing example, and it is not hard to think of other scenarios where partial results are written by one process and then rewritten in-place by another.
|
||||
|
||||
It also doesn't negate the fact that throttling the commit speed would also help reduce I/O between remotes in some cases simply by reducing \"churn\" within any given repo, as noted in [my comment on design/assistant/rate_limiting](https://git-annex.branchable.com/design/assistant/rate_limiting/#comment-c88bc709792c79037a41292c1db70889).
|
||||
"""]]
|
|
@ -1,5 +1,5 @@
|
|||
Name: git-annex
|
||||
Version: 7.20190912
|
||||
Version: 7.20191009
|
||||
Cabal-Version: >= 1.8
|
||||
License: AGPL-3
|
||||
Maintainer: Joey Hess <id@joeyh.name>
|
||||
|
|
|
@ -35,7 +35,7 @@ mkdir -p %{buildroot}/usr/lib/
|
|||
cp -a %{buildroot}/../git-annex.linux %{buildroot}/usr/lib
|
||||
mkdir -p %{buildroot}/usr/bin/
|
||||
ln -sf /usr/lib/git-annex.linux/git-annex %{buildroot}/usr/bin/git-annex
|
||||
ln -sf /usr/lib/git-annex.linux/git-annex %{buildroot}/usr/bin/git-annex-shell
|
||||
ln -sf /usr/lib/git-annex.linux/git-annex-shell %{buildroot}/usr/bin/git-annex-shell
|
||||
|
||||
%files
|
||||
%attr(-, root, root)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue