Merge branch 'master' into database
Conflicts: debian/changelog
This commit is contained in:
commit
bd6e41f8e6
28 changed files with 428 additions and 21 deletions
|
@ -44,31 +44,40 @@ mklibs top = do
|
||||||
-- Various files used by runshell to set up env vars used by the
|
-- Various files used by runshell to set up env vars used by the
|
||||||
-- linker shims.
|
-- linker shims.
|
||||||
writeFile (top </> "libdirs") (unlines libdirs)
|
writeFile (top </> "libdirs") (unlines libdirs)
|
||||||
writeFile (top </> "linker")
|
|
||||||
(Prelude.head $ filter ("ld-linux" `isInfixOf`) libs')
|
|
||||||
writeFile (top </> "gconvdir")
|
writeFile (top </> "gconvdir")
|
||||||
(parentDir $ Prelude.head $ filter ("/gconv/" `isInfixOf`) glibclibs)
|
(parentDir $ Prelude.head $ filter ("/gconv/" `isInfixOf`) glibclibs)
|
||||||
|
|
||||||
mapM_ (installLinkerShim top) exes
|
let linker = Prelude.head $ filter ("ld-linux" `isInfixOf`) libs'
|
||||||
|
mapM_ (installLinkerShim top linker) exes
|
||||||
|
|
||||||
{- Installs a linker shim script around a binary.
|
{- Installs a linker shim script around a binary.
|
||||||
-
|
-
|
||||||
- Note that each binary is put into its own separate directory,
|
- Note that each binary is put into its own separate directory,
|
||||||
- to avoid eg git looking for binaries in its directory rather
|
- to avoid eg git looking for binaries in its directory rather
|
||||||
- than in PATH.-}
|
- than in PATH.
|
||||||
installLinkerShim :: FilePath -> FilePath -> IO ()
|
-
|
||||||
installLinkerShim top exe = do
|
- The linker is symlinked to a file with the same basename as the binary,
|
||||||
createDirectoryIfMissing True shimdir
|
- since that looks better in ps than "ld-linux.so".
|
||||||
|
-}
|
||||||
|
installLinkerShim :: FilePath -> FilePath -> FilePath -> IO ()
|
||||||
|
installLinkerShim top linker exe = do
|
||||||
|
createDirectoryIfMissing True (top </> shimdir)
|
||||||
|
createDirectoryIfMissing True (top </> exedir)
|
||||||
renameFile exe exedest
|
renameFile exe exedest
|
||||||
|
link <- relPathDirToFile (top </> exedir) (top ++ linker)
|
||||||
|
unlessM (doesFileExist (top </> exelink)) $
|
||||||
|
createSymbolicLink link (top </> exelink)
|
||||||
writeFile exe $ unlines
|
writeFile exe $ unlines
|
||||||
[ "#!/bin/sh"
|
[ "#!/bin/sh"
|
||||||
, "exec \"$GIT_ANNEX_LINKER\" --library-path \"$GIT_ANNEX_LD_LIBRARY_PATH\" \"$GIT_ANNEX_SHIMMED/" ++ base ++ "/" ++ base ++ "\" \"$@\""
|
, "exec \"$GIT_ANNEX_DIR/" ++ exelink ++ "\" --library-path \"$GIT_ANNEX_LD_LIBRARY_PATH\" \"$GIT_ANNEX_DIR/shimmed/" ++ base ++ "/" ++ base ++ "\" \"$@\""
|
||||||
]
|
]
|
||||||
modifyFileMode exe $ addModes executeModes
|
modifyFileMode exe $ addModes executeModes
|
||||||
where
|
where
|
||||||
base = takeFileName exe
|
base = takeFileName exe
|
||||||
shimdir = top </> "shimmed" </> base
|
shimdir = "shimmed" </> base
|
||||||
exedest = shimdir </> base
|
exedir = "exe"
|
||||||
|
exedest = top </> shimdir </> base
|
||||||
|
exelink = exedir </> base
|
||||||
|
|
||||||
{- Converting symlinks to hard links simplifies the binary shimming
|
{- Converting symlinks to hard links simplifies the binary shimming
|
||||||
- process. -}
|
- process. -}
|
||||||
|
|
1
debian/changelog
vendored
1
debian/changelog
vendored
|
@ -31,6 +31,7 @@ git-annex (5.20150206) UNRELEASED; urgency=medium
|
||||||
* sync, assistant: Use the ssh-options git config when doing git pull
|
* sync, assistant: Use the ssh-options git config when doing git pull
|
||||||
and push.
|
and push.
|
||||||
* remotedaemon: Use the ssh-options git config.
|
* remotedaemon: Use the ssh-options git config.
|
||||||
|
* Linux standalone: Improved process names of linker shimmed programs.
|
||||||
* fsck: Incremental fsck uses sqlite to store its records, instead
|
* fsck: Incremental fsck uses sqlite to store its records, instead
|
||||||
of abusing the sticky bit. Existing sticky bits are ignored,
|
of abusing the sticky bit. Existing sticky bits are ignored,
|
||||||
incremental fscks started by old versions won't be resumed by
|
incremental fscks started by old versions won't be resumed by
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="https://openid.stackexchange.com/user/814e4910-8e9b-4fe5-83ef-ff863c1a7314"
|
||||||
|
nickname="BehemothTheCat"
|
||||||
|
subject="push fails"
|
||||||
|
date="2015-02-14T00:11:40Z"
|
||||||
|
content="""
|
||||||
|
These instructions don't work for me, unfortunately.
|
||||||
|
|
||||||
|
This step:
|
||||||
|
|
||||||
|
git push origin master git-annex
|
||||||
|
|
||||||
|
results in:
|
||||||
|
|
||||||
|
To ssh://my.server.com/home/itz/git/annex.git
|
||||||
|
! [rejected] git-annex -> git-annex (non-fast-forward)
|
||||||
|
error: failed to push some refs to 'ssh://my.server.com/home/itz/git/annex.git'
|
||||||
|
hint: Updates were rejected because a pushed branch tip is behind its remote
|
||||||
|
hint: counterpart. Check out this branch and integrate the remote changes
|
||||||
|
hint: (e.g. 'git pull ...') before pushing again.
|
||||||
|
hint: See the 'Note about fast-forwards' in 'git push --help' for details.
|
||||||
|
|
||||||
|
Versions: git 1:1.9.1-1~bpo70+2 , git-annex 5.20141024~bpo70+1 (both packaged by Debian, same on local and remote)
|
||||||
|
|
||||||
|
And yes, I did a pull on the master branch first. Afraid to do anything
|
||||||
|
with the git-annex branch without explicit instruction.
|
||||||
|
|
||||||
|
"""]]
|
|
@ -0,0 +1,8 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="https://www.google.com/accounts/o8/id?id=AItOawnwNDA50ZupMvOgpgDqzDRyu5B-mYlVwa4"
|
||||||
|
nickname="Andreas"
|
||||||
|
subject="comment 8"
|
||||||
|
date="2015-02-16T17:32:32Z"
|
||||||
|
content="""
|
||||||
|
Sorry, missed the link. The recent version from the tarball fixes the issue for me.
|
||||||
|
"""]]
|
41
doc/bugs/weird_entry_in_process_list.mdwn
Normal file
41
doc/bugs/weird_entry_in_process_list.mdwn
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
### Please describe the problem.
|
||||||
|
|
||||||
|
The standalone linux binaries do not show up as `git-annex` in the process list, but as `ld-linux-x86-64` - it's pretty confusing!
|
||||||
|
|
||||||
|
### What steps will reproduce the problem?
|
||||||
|
|
||||||
|
Install the standalone binaries from downloads.kitenet.net, run git-annex.
|
||||||
|
|
||||||
|
### What version of git-annex are you using? On what operating system?
|
||||||
|
|
||||||
|
Today's snapshot from downloads.k.n.
|
||||||
|
|
||||||
|
### Please provide any additional information below.
|
||||||
|
|
||||||
|
[[!format sh """
|
||||||
|
# If you can, paste a complete transcript of the problem occurring here.
|
||||||
|
# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log
|
||||||
|
root@koumbit-mp-test:/var/isuma/media/video# top -b -n 1 | head -10
|
||||||
|
top - 14:00:09 up 15 days, 23:25, 4 users, load average: 1.18, 1.26, 1.34
|
||||||
|
Tasks: 216 total, 1 running, 213 sleeping, 0 stopped, 2 zombie
|
||||||
|
Cpu(s): 0.4%us, 0.1%sy, 0.0%ni, 99.3%id, 0.2%wa, 0.0%hi, 0.0%si, 0.0%st
|
||||||
|
Mem: 6122044k total, 5469364k used, 652680k free, 321080k buffers
|
||||||
|
Swap: 2928632k total, 0k used, 2928632k free, 4009592k cached
|
||||||
|
|
||||||
|
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
|
||||||
|
28261 root 20 0 4528 652 528 D 79 0.0 0:01.28 ld-linux-x86-64
|
||||||
|
1381 root 20 0 126m 13m 4060 S 2 0.2 190:25.64 Xorg
|
||||||
|
1 root 20 0 8356 812 684 S 0 0.0 0:05.50 init
|
||||||
|
root@koumbit-mp-test:/var/isuma/media/video# ps axf | grep annex
|
||||||
|
9861 pts/2 S+ 0:00 | \_ git annex add hd high high~ ipod ipod~ large low mp4_sd raw small wc xlarge
|
||||||
|
9862 pts/2 Sl+ 3:50 | \_ /opt/git-annex.linux//lib64/ld-linux-x86-64.so.2 --library-path /opt/git-annex.linux//etc/ld.so.conf.d:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu/audit:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu/gconv:/opt/git-annex.linux//usr/lib:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu:/opt/git-annex.linux//lib64:/opt/git-annex.linux//lib/x86_64-linux-gnu: /opt/git-annex.linux/shimmed/git-annex/git-annex add hd high high~ ipod ipod~ large low mp4_sd raw small wc xlarge
|
||||||
|
9878 pts/2 S+ 0:00 | \_ /opt/git-annex.linux//lib64/ld-linux-x86-64.so.2 --library-path /opt/git-annex.linux//etc/ld.so.conf.d:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu/audit:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu/gconv:/opt/git-annex.linux//usr/lib:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu:/opt/git-annex.linux//lib64:/opt/git-annex.linux//lib/x86_64-linux-gnu: /opt/git-annex.linux/shimmed/git/git --git-dir=.git --work-tree=. check-attr -z --stdin annex.backend annex.numcopies --
|
||||||
|
9881 pts/2 S+ 0:01 | \_ /opt/git-annex.linux//lib64/ld-linux-x86-64.so.2 --library-path /opt/git-annex.linux//etc/ld.so.conf.d:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu/audit:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu/gconv:/opt/git-annex.linux//usr/lib:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu:/opt/git-annex.linux//lib64:/opt/git-annex.linux//lib/x86_64-linux-gnu: /opt/git-annex.linux/shimmed/git/git --git-dir=.git --work-tree=. cat-file --batch
|
||||||
|
9882 pts/2 S+ 0:00 | \_ /opt/git-annex.linux//lib64/ld-linux-x86-64.so.2 --library-path /opt/git-annex.linux//etc/ld.so.conf.d:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu/audit:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu/gconv:/opt/git-annex.linux//usr/lib:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu:/opt/git-annex.linux//lib64:/opt/git-annex.linux//lib/x86_64-linux-gnu: /opt/git-annex.linux/shimmed/git/git --git-dir=.git --work-tree=. cat-file --batch
|
||||||
|
28293 pts/2 R+ 0:00 | \_ /opt/git-annex.linux//lib64/ld-linux-x86-64.so.2 --library-path /opt/git-annex.linux//etc/ld.so.conf.d:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu/audit:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu/gconv:/opt/git-annex.linux//usr/lib:/opt/git-annex.linux//usr/lib/x86_64-linux-gnu:/opt/git-annex.linux//lib64:/opt/git-annex.linux//lib/x86_64-linux-gnu: /opt/git-annex.linux/shimmed/sha256sum/sha256sum .git/annex/misctmp/videonew9862
|
||||||
|
# End of transcript or log.
|
||||||
|
"""]]
|
||||||
|
|
||||||
|
couldn't it alter its process name to make this a little more intuitive? This is especially problematic because i am trying to hook git-annex into Puppet and Facter, which require me to guess where the various git-annex repos are on the server. The way i was doing that so far was with `lsof -c 'git-annex' -F0tn`, which is obviously failing under those circumstances.... Unless there's a better way to find those repos across the system? I assume there's a git-annex assistant running here... --[[anarcat]]
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
|
@ -0,0 +1,13 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 1"""
|
||||||
|
date="2015-02-16T23:35:08Z"
|
||||||
|
content="""
|
||||||
|
Haskell programs actually cannot alter their process name. I've had a bug
|
||||||
|
open on ghc for a year about that.
|
||||||
|
|
||||||
|
However, I can make a nicer symlink name than ld-linux.so, and use that,
|
||||||
|
and it will then be clear what program is being run, although the
|
||||||
|
parameters to it will still be unusual due to the shimming used in the
|
||||||
|
standalone build.
|
||||||
|
"""]]
|
|
@ -6,7 +6,7 @@ locally paired systems, and remote servers with rsync.
|
||||||
Help me prioritize my work: What special remote would you most like
|
Help me prioritize my work: What special remote would you most like
|
||||||
to use with the git-annex assistant?
|
to use with the git-annex assistant?
|
||||||
|
|
||||||
[[!poll open=yes 18 "Amazon S3 (done)" 12 "Amazon Glacier (done)" 10 "Box.com (done)" 74 "My phone (or MP3 player)" 25 "Tahoe-LAFS" 13 "OpenStack SWIFT" 35 "Google Drive"]]
|
[[!poll open=yes 18 "Amazon S3 (done)" 12 "Amazon Glacier (done)" 10 "Box.com (done)" 74 "My phone (or MP3 player)" 25 "Tahoe-LAFS" 13 "OpenStack SWIFT" 36 "Google Drive"]]
|
||||||
|
|
||||||
This poll is ordered with the options I consider easiest to build
|
This poll is ordered with the options I consider easiest to build
|
||||||
listed first. Mostly because git-annex already supports them and they
|
listed first. Mostly because git-annex already supports them and they
|
||||||
|
|
|
@ -25,11 +25,11 @@ Store in the database the Ref of the branch that was used to construct it.
|
||||||
|
|
||||||
## implementation plan
|
## implementation plan
|
||||||
|
|
||||||
1. Implement for metadata, on a branch, with sqlite.
|
1. Store incremental fsck info in db, on a branch, with sqlite.
|
||||||
2. Make sure that builds on all platforms.
|
2. Make sure that builds on all platforms.
|
||||||
3. Add associated file mappings support. This is needed to fully
|
3. Implement for metadata, on a branch, with sqlite.
|
||||||
|
4. Add associated file mappings support. This is needed to fully
|
||||||
use the caching database to construct views.
|
use the caching database to construct views.
|
||||||
4. Store incremental fsck info in db.
|
|
||||||
5. Replace .map files with 3. for direct mode.
|
5. Replace .map files with 3. for direct mode.
|
||||||
|
|
||||||
## sqlite or not?
|
## sqlite or not?
|
||||||
|
@ -39,12 +39,21 @@ SQL. And even if that's hidden by a layer like persistent, it's still going
|
||||||
to involve some technical debt (eg, database migrations).
|
to involve some technical debt (eg, database migrations).
|
||||||
|
|
||||||
It would be great if there were some haskell thing like acid-state
|
It would be great if there were some haskell thing like acid-state
|
||||||
that I could use instead. But, acid-sate needs to load the whole
|
that I could use instead. But, acid-state needs to load the whole
|
||||||
DB into memory. In the comments of
|
DB into memory. In the comments of
|
||||||
[[bugs/incremental_fsck_should_not_use_sticky_bit]] I examined several
|
[[bugs/incremental_fsck_should_not_use_sticky_bit]] I examined several
|
||||||
other haskell database-like things, and found them all wanting, except for
|
other haskell database-like things, and found them all wanting, except for
|
||||||
possibly TCache. (And TCache is backed by persistent/sqlite anyway.)
|
possibly TCache. (And TCache is backed by persistent/sqlite anyway.)
|
||||||
|
|
||||||
|
## one db or multiple?
|
||||||
|
|
||||||
|
Using a single database will use less space. Eg, each Key will only need to
|
||||||
|
appear in it once, with proper normalization.
|
||||||
|
|
||||||
|
OTOH, it's more complicated, and harder to recover from problems.
|
||||||
|
|
||||||
|
Currently leaning toward one database per purpose.
|
||||||
|
|
||||||
## case study: persistent with sqllite
|
## case study: persistent with sqllite
|
||||||
|
|
||||||
Here's a non-normalized database schema in persistent's syntax.
|
Here's a non-normalized database schema in persistent's syntax.
|
||||||
|
@ -123,6 +132,11 @@ eg, esquelito.
|
||||||
Update2: Using esquelito to do a join got this down to 0.109s.
|
Update2: Using esquelito to do a join got this down to 0.109s.
|
||||||
See `database` branch for code.
|
See `database` branch for code.
|
||||||
|
|
||||||
|
Update3: Converting to a single un-normalized table for AssociatedFiles
|
||||||
|
avoids the join, and increased lookup speed to 0.087s. Of course, when
|
||||||
|
a key has multiple associated files, this will use more disk space, due
|
||||||
|
to not normalizing the key.
|
||||||
|
|
||||||
Compare the above with 1000 calls to `associatedFiles`, which is approximately
|
Compare the above with 1000 calls to `associatedFiles`, which is approximately
|
||||||
as fast as just opening and reading 1000 files, so will take well under
|
as fast as just opening and reading 1000 files, so will take well under
|
||||||
0.05s with a **cold** cache.
|
0.05s with a **cold** cache.
|
||||||
|
|
56
doc/devblog/day_253__sqlite_for_incremental_fsck.mdwn
Normal file
56
doc/devblog/day_253__sqlite_for_incremental_fsck.mdwn
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
Yesterday I did a little more investigation of key/value stores.
|
||||||
|
I'd love a pure haskell key/value store that didn't buffer everything in
|
||||||
|
memory, and that allowed concurrent readers, and was ACID, and production
|
||||||
|
quality. But so far, I have not found anything that meets all those
|
||||||
|
criteria. It seems that sqlite is the best choice for now.
|
||||||
|
|
||||||
|
Started working on the `database` branch today. The plan is to use
|
||||||
|
sqlite for incremental fsck first, and if that works well, do the rest
|
||||||
|
of what's planned in [[design/caching_database]].
|
||||||
|
|
||||||
|
At least for now, I'm going to use a dedicated database file for each
|
||||||
|
different thing. (This may not be as space-efficient due to lacking
|
||||||
|
normalization, but it keeps things simple.)
|
||||||
|
|
||||||
|
So, .git/annex/fsck.db will be used by incremental fsck, and it has
|
||||||
|
a super simple Persistent database schema:
|
||||||
|
|
||||||
|
[[!format haskell """
|
||||||
|
Fscked
|
||||||
|
key SKey
|
||||||
|
UniqueKey key
|
||||||
|
"""]]
|
||||||
|
|
||||||
|
It was pretty easy to implement this and make incremental fsck use it. The
|
||||||
|
hard part is making it both fast and robust.
|
||||||
|
|
||||||
|
At first, I was doing everything inside a single `runSqlite` action.
|
||||||
|
Including creating the table. But, it turns out that runs as a single
|
||||||
|
transaction, and if it was interrupted, this left the database in a
|
||||||
|
state where it exists, but has no tables. Hard to recover from.
|
||||||
|
|
||||||
|
So, I separated out creating the database, made that be done in a separate
|
||||||
|
transation and fully atomically. Now `fsck --incremental` could be crtl-c'd
|
||||||
|
and resumed with `fsck --more`, but it would lose the transaction and so
|
||||||
|
not remember anything had been checked.
|
||||||
|
|
||||||
|
To fix that, I tried making a separate transation per file fscked. That
|
||||||
|
worked, and it resumes nicely where it left off, but all those transactions
|
||||||
|
made it much slower.
|
||||||
|
|
||||||
|
To fix the speed, I made it commit just one transaction per minute. This
|
||||||
|
seems like an ok balance. Having fsck re-do one minute's work when restarting
|
||||||
|
an interrupted incremental fsck is perfectly reasonable, and now the speed,
|
||||||
|
using the sqlite database, is nearly as fast as the old sticky bit hack was.
|
||||||
|
(Specifically, 6m7s old vs 6m27s new, fscking 37000 files from cold cache
|
||||||
|
in --fast mode.)
|
||||||
|
|
||||||
|
There is still a problem with multiple concurrent `fsck --more`
|
||||||
|
failing. Probably a concurrent writer problem? And, some porting will be
|
||||||
|
required to get sqlite and persistent working on Windows and Android.
|
||||||
|
So the branch isn't ready to merge yet, but it seems promising.
|
||||||
|
|
||||||
|
In retrospect, while incremental fsck has the simplest database schema, it
|
||||||
|
might be one of the harder things listed in [[design/caching_database]],
|
||||||
|
just because it involves so many writes to the database. The other use
|
||||||
|
cases are more read heavy.
|
|
@ -0,0 +1,7 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="https://id.koumbit.net/anarcat"
|
||||||
|
subject="comment 1"
|
||||||
|
date="2015-02-16T21:26:22Z"
|
||||||
|
content="""
|
||||||
|
i am curious: why separate database files while you can have multiple tables in the same database file? --[[anarcat]]
|
||||||
|
"""]]
|
|
@ -104,6 +104,14 @@ command run on that work tree, and then updating the real work
|
||||||
tree to reflect any changes staged or committed by the git command,
|
tree to reflect any changes staged or committed by the git command,
|
||||||
with appropriate handling of the direct mode files.
|
with appropriate handling of the direct mode files.
|
||||||
|
|
||||||
|
## undoing changes in direct mode
|
||||||
|
|
||||||
|
There is also the `undo` command to do the equivalent of the above revert in a simpler way. Say you made a change in direct mode, the assistant dutifully committed it and you realise your mistake, you can try:
|
||||||
|
|
||||||
|
git annex undo file
|
||||||
|
|
||||||
|
to revert the last change to `file`. Note that you can use the `--depth` flag to revert earlier versions of the file.
|
||||||
|
|
||||||
## forcing git to use the work tree in direct mode
|
## forcing git to use the work tree in direct mode
|
||||||
|
|
||||||
This is for experts only. You can lose data doing this, or check enormous
|
This is for experts only. You can lose data doing this, or check enormous
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
My copy of *git-annex* refuses to sync all, namely when I try it I get the following error
|
||||||
|
|
||||||
|
$ git annex sync --content --all
|
||||||
|
git-annex: unrecognized option `--all'
|
||||||
|
|
||||||
|
Usage: git-annex sync [REMOTE ...] [option ...]
|
||||||
|
--content also transfer file contents
|
||||||
|
|
||||||
|
To see additional options common to all commands, run: git annex help options
|
||||||
|
|
||||||
|
This contradicts the advice on [preferred content](http://git-annex.branchable.com/preferred_content/) set out under **difference: unused**,
|
||||||
|
and I cannot see any other options in my man page that would address the lack of this option.
|
||||||
|
|
||||||
|
The problem I am trying to solve is that I wish to preserve all history on the backup drives. Namely, if I do the following
|
||||||
|
|
||||||
|
touch test-of-annex-backup.txt
|
||||||
|
git annex add test-of-annex-backup.txt
|
||||||
|
git commit --message='test: Create empty test-of-annex-backup.txt file'
|
||||||
|
git annex edit test-of-annex-backup.txt
|
||||||
|
echo "This line creates version 2 of this file" > test-of-annex-backup.txt
|
||||||
|
git annex add test-of-annex-backup.txt
|
||||||
|
git commit --message='test: Create version 2 of test-of-annex-backup.txt'
|
||||||
|
git annex sync --content --all
|
||||||
|
|
||||||
|
I expect to see 2 copies of `test-of-annex-backup.txt` be copied to each accessible annex repository in the `backup` group
|
||||||
|
|
||||||
|
I tried googling for `"git annex sync --content --all"`, but I only find pages telling me that this is what I should use, and none saying the option has been deprecated.
|
||||||
|
|
||||||
|
I am very confused, as this seems to me an almost stereotypical use of *git-annex*, and yet I cannot see how to do it
|
||||||
|
|
||||||
|
thanks
|
||||||
|
|
||||||
|
Andrew
|
|
@ -0,0 +1,77 @@
|
||||||
|
Step by step:
|
||||||
|
|
||||||
|
git annex add ./hugePictureFolder<br>
|
||||||
|
// no it's to big and taking to long, let's not do this<br>
|
||||||
|
CRTL+D<br>
|
||||||
|
git annex --force drop ./hugePictureFolder<br>
|
||||||
|
git status<br>
|
||||||
|
fatal: bad default revision 'HEAD'<br>
|
||||||
|
git reset --hard git-annex<br>
|
||||||
|
git status // ok<br>
|
||||||
|
ls <br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 000<br>
|
||||||
|
drwxr-xr-x 9 rolas rolas 4096 Vas 16 11:40 001<br>
|
||||||
|
drwxr-xr-x 11 rolas rolas 4096 Vas 16 11:40 002<br>
|
||||||
|
drwxr-xr-x 12 rolas rolas 4096 Vas 16 11:40 003<br>
|
||||||
|
drwxr-xr-x 6 rolas rolas 4096 Vas 16 11:40 004<br>
|
||||||
|
drwxr-xr-x 12 rolas rolas 4096 Vas 16 11:40 005<br>
|
||||||
|
drwxr-xr-x 11 rolas rolas 4096 Vas 16 11:40 006<br>
|
||||||
|
drwxr-xr-x 13 rolas rolas 4096 Vas 16 11:40 007<br>
|
||||||
|
drwxr-xr-x 6 rolas rolas 4096 Vas 16 11:40 008<br>
|
||||||
|
drwxr-xr-x 13 rolas rolas 4096 Vas 16 11:40 009<br>
|
||||||
|
drwxr-xr-x 14 rolas rolas 4096 Vas 16 11:40 00a<br>
|
||||||
|
drwxr-xr-x 16 rolas rolas 4096 Vas 16 11:40 00b<br>
|
||||||
|
drwxr-xr-x 11 rolas rolas 4096 Vas 16 11:40 00c<br>
|
||||||
|
drwxr-xr-x 9 rolas rolas 4096 Vas 16 11:40 00d<br>
|
||||||
|
drwxr-xr-x 20 rolas rolas 4096 Vas 16 11:40 00e<br>
|
||||||
|
drwxr-xr-x 18 rolas rolas 4096 Vas 16 11:40 00f<br>
|
||||||
|
drwxr-xr-x 14 rolas rolas 4096 Vas 16 11:40 010<br>
|
||||||
|
drwxr-xr-x 11 rolas rolas 4096 Vas 16 11:40 011<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 012<br>
|
||||||
|
drwxr-xr-x 12 rolas rolas 4096 Vas 16 11:40 013<br>
|
||||||
|
drwxr-xr-x 7 rolas rolas 4096 Vas 16 11:40 014<br>
|
||||||
|
drwxr-xr-x 16 rolas rolas 4096 Vas 16 11:40 015<br>
|
||||||
|
drwxr-xr-x 7 rolas rolas 4096 Vas 16 11:40 016<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 017<br>
|
||||||
|
drwxr-xr-x 9 rolas rolas 4096 Vas 16 11:40 018<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 019<br>
|
||||||
|
drwxr-xr-x 8 rolas rolas 4096 Vas 16 11:40 01a<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 01b<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 01c<br>
|
||||||
|
drwxr-xr-x 8 rolas rolas 4096 Vas 16 11:40 01d<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 01e<br>
|
||||||
|
drwxr-xr-x 11 rolas rolas 4096 Vas 16 11:40 01f<br>
|
||||||
|
drwxr-xr-x 15 rolas rolas 4096 Vas 16 11:40 020<br>
|
||||||
|
drwxr-xr-x 13 rolas rolas 4096 Vas 16 11:40 021<br>
|
||||||
|
drwxr-xr-x 5 rolas rolas 4096 Vas 16 11:40 022<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 023<br>
|
||||||
|
drwxr-xr-x 9 rolas rolas 4096 Vas 16 11:40 024<br>
|
||||||
|
drwxr-xr-x 12 rolas rolas 4096 Vas 16 11:40 025<br>
|
||||||
|
drwxr-xr-x 8 rolas rolas 4096 Vas 16 11:40 026<br>
|
||||||
|
drwxr-xr-x 12 rolas rolas 4096 Vas 16 11:40 027<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 028<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 029<br>
|
||||||
|
drwxr-xr-x 9 rolas rolas 4096 Vas 16 11:40 02a<br>
|
||||||
|
drwxr-xr-x 9 rolas rolas 4096 Vas 16 11:40 02b<br>
|
||||||
|
drwxr-xr-x 6 rolas rolas 4096 Vas 16 11:40 02c<br>
|
||||||
|
drwxr-xr-x 7 rolas rolas 4096 Vas 16 11:40 02d<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 02e<br>
|
||||||
|
drwxr-xr-x 5 rolas rolas 4096 Vas 16 11:40 02f<br>
|
||||||
|
drwxr-xr-x 13 rolas rolas 4096 Vas 16 11:40 030<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 031<br>
|
||||||
|
drwxr-xr-x 10 rolas rolas 4096 Vas 16 11:40 032<br>
|
||||||
|
...<br>
|
||||||
|
...<br>
|
||||||
|
<br>
|
||||||
|
What did I Do? Can I do CTRL+D? If yes, what should I do to recover?<br>
|
||||||
|
<br>
|
||||||
|
Thanks<br>
|
||||||
|
Rolandas<br>
|
||||||
|
<br>
|
||||||
|
$ git --version<br>
|
||||||
|
git version 2.3.0<br>
|
||||||
|
<br>
|
||||||
|
$ git annex version<br>
|
||||||
|
git-annex version: 5.20140412ubuntu1<br>
|
||||||
|
<br>
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="https://id.koumbit.net/anarcat"
|
||||||
|
subject="watch out for direct mode"
|
||||||
|
date="2015-02-16T23:22:19Z"
|
||||||
|
content="""
|
||||||
|
so while `git rebase` can do magic, it will not work out of the box on direct mode repositories, unless you use `-c core.bare=false`, in which case you will totally shoot yourself in the foot because git will happily remove all those real files sitting in the checkout. you will need to `git annex indirect` before you do any of that magic. working on a clone of the git repo is also a good idea, if only for testing.
|
||||||
|
|
||||||
|
i personnally destroyed my whole music collection doing such a cleanup of the history. fortunately, i had a recent archived clone of the repo, so things weren't so bad.
|
||||||
|
|
||||||
|
but watch out for direct mode, as always.
|
||||||
|
"""]]
|
|
@ -0,0 +1,8 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="https://www.google.com/accounts/o8/id?id=AItOawkMTPqZZWoz396ABpx6nh3osxKQCFaSW6M"
|
||||||
|
nickname="Mark"
|
||||||
|
subject="comment 8"
|
||||||
|
date="2015-02-13T13:55:21Z"
|
||||||
|
content="""
|
||||||
|
Ah, ok. So it doesn't cause any issues that the host/* remote branches will also keep getting swapped from one repository to another? The operation of `git annex sync` is sufficiently (and happily) opaque to me, so I was concerned that this might break some of its basic assumptions.
|
||||||
|
"""]]
|
13
doc/forum/optimising_lookupkey.mdwn
Normal file
13
doc/forum/optimising_lookupkey.mdwn
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
to work around [[forum/original_filename_on_s3/]], i need to get the key from a file, and i'm not within the git-annex process. i know there's `git annex lookupkey $FILE`, but that incurs significant overhead because the whole git annex runtime needs to fire up. in my tests, this takes around 25ms on average.
|
||||||
|
|
||||||
|
could i optimise this by simply doing a `readlink` call on the git checkout? it sure looks like `readlink | basename` is all I really need, and that can probably be done below 10ms (4ms in my tests). how reliable are those links anyways, and is that what lookupkey does?
|
||||||
|
|
||||||
|
similarly, i wonder if it's safe to bypass git-annex and talk straight with git to extract location tracking? i can jump from 90ms to below 10ms for such requests if I turn `git annex find <file>` into the convoluted:
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
git annex lookupkey $file
|
||||||
|
printf $key | md5sum
|
||||||
|
git cat-file -p refs/heads/git-annex:$hash/${key}.log
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
thanks. --[[anarcat]]
|
3
doc/forum/root_assistant__63__.mdwn
Normal file
3
doc/forum/root_assistant__63__.mdwn
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
How safe (or not) is it to run the assistant as root?
|
||||||
|
|
||||||
|
If not safe, what would be a good way to sync directories like /usr/local ?
|
|
@ -0,0 +1,11 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="https://id.koumbit.net/anarcat"
|
||||||
|
subject="why md5sum?"
|
||||||
|
date="2015-02-13T15:59:46Z"
|
||||||
|
content="""
|
||||||
|
why the extra processing to generate the hashing directories?
|
||||||
|
|
||||||
|
we already have a hash here, for example, `SHA256E-s8242375--5f82490990812ad3feabb02355750710a9d94283ab256d1c691c3bf8d7d9fbe3.ogg` has a loon `5f82490990812ad3feabb02355750710a9d94283ab256d1c691c3bf8d7d9fbe3` hash. Why not use the first characters of that? This is will not change for a give file, and has a higher chance of generating collisions (which is a good thing here, because we can reuse directories).
|
||||||
|
|
||||||
|
In other words, why aren't the hashes of `SHA256E-s8242375--5f82490990812ad3feabb02355750710a9d94283ab256d1c691c3bf8d7d9fbe3.ogg` simply `5f8/249`? --[[anarcat]]
|
||||||
|
"""]]
|
|
@ -68,4 +68,4 @@ the S3 remote.
|
||||||
then use the same bucket.
|
then use the same bucket.
|
||||||
|
|
||||||
* `x-amz-meta-*` are passed through as http headers when storing keys
|
* `x-amz-meta-*` are passed through as http headers when storing keys
|
||||||
in S3.
|
in S3. see [the Internet Archive S3 interface documentation](https://archive.org/help/abouts3.txt) for example headers.
|
||||||
|
|
|
@ -6,7 +6,7 @@ you want to use to sneakernet files between systems (possibly with
|
||||||
the drive's mountpoint as a directory remote.
|
the drive's mountpoint as a directory remote.
|
||||||
|
|
||||||
Note that directory remotes have a special directory structure
|
Note that directory remotes have a special directory structure
|
||||||
(by design, the same as the \[[rsync|rsync]] remote).
|
(by design, the same as the [[rsync|rsync]] remote).
|
||||||
If you just want two copies of your repository with the files "visible"
|
If you just want two copies of your repository with the files "visible"
|
||||||
in the tree in both, the directory special remote is not what you want.
|
in the tree in both, the directory special remote is not what you want.
|
||||||
Instead, you should use a regular `git clone` of your git-annex repository.
|
Instead, you should use a regular `git clone` of your git-annex repository.
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="https://id.koumbit.net/anarcat"
|
||||||
|
subject="spurious changes in metadata.db"
|
||||||
|
date="2015-02-15T05:03:20Z"
|
||||||
|
content="""
|
||||||
|
note that metadata.db seems to change even though no change was performed on the library. i filed a [bug report upstream](https://bugs.launchpad.net/calibre/+bug/1422058) to try and figure out what is going on here. -- [[anarcat]]
|
||||||
|
"""]]
|
|
@ -0,0 +1,7 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="https://id.koumbit.net/anarcat"
|
||||||
|
subject="undo to the rescue"
|
||||||
|
date="2015-02-15T05:52:58Z"
|
||||||
|
content="""
|
||||||
|
note: to avoid having too many such changes, i end up using [[todo/direct_mode_undo]] quite often.
|
||||||
|
"""]]
|
|
@ -0,0 +1,7 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="BojanNikolic"
|
||||||
|
subject="Publishing using rsync/directory layout"
|
||||||
|
date="2015-02-16T10:04:41Z"
|
||||||
|
content="""
|
||||||
|
Is it possible to easily do the same with rsync/directory layout of the special remote? These have prefixes which are not shown when doing git annex lookupkey
|
||||||
|
"""]]
|
|
@ -0,0 +1,14 @@
|
||||||
|
I'm using git-annex 5.20150205-gbf9058a and just used the WebApp to create a new remote SSH repo, and thought I'd try the encrypted option.
|
||||||
|
|
||||||
|
It give me three GPG keys to choose from (all valid keys) but only displayed the email addresses which were all identical so I couldn't tell which was which.
|
||||||
|
|
||||||
|
I then clicked the first key selection button, hoping it would display more info but it seemed to start doing things immediately. It requested the GPG passphrase which I cancelled but it was still doing things, and worse it wasn't clear what state the repo was in (encrypted or not), so I deleted it and started again (it's fine now).
|
||||||
|
|
||||||
|
The passphrase dialog box does display the key fingerprint, but it's then too late to alter the key selection.
|
||||||
|
|
||||||
|
Request: Could the WebApp always display the fingerprint after the email address?
|
||||||
|
|
||||||
|
Some clarity on what happens when you cancel would be nice too.
|
||||||
|
|
||||||
|
Thanks
|
||||||
|
Giovanni
|
|
@ -84,3 +84,5 @@ that touched files in that directory, and undo the changes to those files.
|
||||||
|
|
||||||
Also, --depth could make undo look for an older commit than the most
|
Also, --depth could make undo look for an older commit than the most
|
||||||
recent one to affect the specified file.
|
recent one to affect the specified file.
|
||||||
|
|
||||||
|
See [[direct_mode]] for documentation about this feature.
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="https://id.koumbit.net/anarcat"
|
||||||
|
subject="comment 1"
|
||||||
|
date="2015-02-15T05:46:01Z"
|
||||||
|
content="""
|
||||||
|
> This way, if a file has a staged change, it gets committed, and then that commit is reverted, resulting in another commit. Which a later run of undo can in turn revert. If it didn't commit, the history about the staged change that was reverted would be lost.
|
||||||
|
|
||||||
|
so far, my experience with this is that unstaged changes get dropped and the change that gets undoed is the last committed change. In other words, if i have:
|
||||||
|
|
||||||
|
$ git annex status
|
||||||
|
M file
|
||||||
|
|
||||||
|
`git annex undo` is going to drop that modification and `git revert HEAD`. but maybe i got confused, in which care some of the documentation i just did in [[direct mode]] needs to be corrected. --[[anarcat]]
|
||||||
|
"""]]
|
|
@ -0,0 +1,17 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="https://id.koumbit.net/anarcat"
|
||||||
|
subject="sigh... nevermind"
|
||||||
|
date="2015-02-15T05:52:02Z"
|
||||||
|
content="""
|
||||||
|
seems like i was wrong. i could have sworn i saw a committed file get unstaged. what i saw was:
|
||||||
|
|
||||||
|
$ git annex status
|
||||||
|
M file
|
||||||
|
$ git annex undo file
|
||||||
|
$ git annex status
|
||||||
|
? file
|
||||||
|
|
||||||
|
the thing is: the file was *removed* in a previous version, so i thought this was what it reverted to. i'm unsure as to why the file was marked as missing there - i ended up reverting from a backup (from another remote, by hand). after trying to reproduce this, i failed, so there may have been some PEBKAC in action again.
|
||||||
|
|
||||||
|
this feature is so useful though, thanks for this. --[[anarcat]]
|
||||||
|
"""]]
|
|
@ -66,10 +66,8 @@ for lib in $(cat $base/libdirs); do
|
||||||
GIT_ANNEX_LD_LIBRARY_PATH="$base/$lib:$GIT_ANNEX_LD_LIBRARY_PATH"
|
GIT_ANNEX_LD_LIBRARY_PATH="$base/$lib:$GIT_ANNEX_LD_LIBRARY_PATH"
|
||||||
done
|
done
|
||||||
export GIT_ANNEX_LD_LIBRARY_PATH
|
export GIT_ANNEX_LD_LIBRARY_PATH
|
||||||
GIT_ANNEX_LINKER="$base/$(cat $base/linker)"
|
GIT_ANNEX_DIR="$base"
|
||||||
export GIT_ANNEX_LINKER
|
export GIT_ANNEX_DIR
|
||||||
GIT_ANNEX_SHIMMED="$base/shimmed"
|
|
||||||
export GIT_ANNEX_SHIMMED
|
|
||||||
|
|
||||||
ORIG_GCONV_PATH="$GCONV_PATH"
|
ORIG_GCONV_PATH="$GCONV_PATH"
|
||||||
export ORIG_GCONV_PATH
|
export ORIG_GCONV_PATH
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue