Merge branch 'master' into assistant

This commit is contained in:
Joey Hess 2012-07-16 15:06:08 -04:00
commit fa3aef96e2
50 changed files with 582 additions and 46 deletions

View file

@ -72,7 +72,7 @@ getVersionString = do
getGitVersion :: Test
getGitVersion = do
(_, s) <- pipeFrom "git" ["--version"]
let version = last $ words $ head $ lines s
let version = unwords $ drop 2 $ words $ head $ lines s
return $ Config "gitversion" (StringConfig version)
getSshConnectionCaching :: Test

View file

@ -21,6 +21,7 @@ module Command (
isBareRepo,
numCopies,
autoCopies,
autoCopiesWith,
module ReExported
) where
@ -112,8 +113,18 @@ numCopies file = readish <$> checkAttr "annex.numcopies" file
- In auto mode, first checks that the number of known
- copies of the key is > or < than the numcopies setting, before running
- the action. -}
autoCopies :: FilePath -> Key -> (Int -> Int -> Bool) -> (Maybe Int -> CommandStart) -> CommandStart
autoCopies file key vs a = do
autoCopies :: FilePath -> Key -> (Int -> Int -> Bool) -> CommandStart -> CommandStart
autoCopies file key vs a = Annex.getState Annex.auto >>= go
where
go False = a
go True = do
numcopiesattr <- numCopies file
needed <- getNumCopies numcopiesattr
(_, have) <- trustPartition UnTrusted =<< Remote.keyLocations key
if length have `vs` needed then a else stop
autoCopiesWith :: FilePath -> Key -> (Int -> Int -> Bool) -> (Maybe Int -> CommandStart) -> CommandStart
autoCopiesWith file key vs a = do
numcopiesattr <- numCopies file
Annex.getState Annex.auto >>= auto numcopiesattr
where

View file

@ -24,5 +24,5 @@ seek = [withField Command.Move.toOption Remote.byName $ \to ->
-- A copy is just a move that does not delete the source file.
-- However, --auto mode avoids unnecessary copies.
start :: Maybe Remote -> Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start to from file (key, backend) = autoCopies file key (<) $ \_numcopies ->
start to from file (key, backend) = autoCopies file key (<) $
Command.Move.start to from False file (key, backend)

View file

@ -30,7 +30,7 @@ seek = [withField fromOption Remote.byName $ \from ->
withFilesInGit $ whenAnnexed $ start from]
start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start from file (key, _) = autoCopies file key (>) $ \numcopies ->
start from file (key, _) = autoCopiesWith file key (>) $ \numcopies ->
case from of
Nothing -> startLocal file numcopies key
Just remote -> do

View file

@ -24,7 +24,7 @@ seek = [withField Command.Move.fromOption Remote.byName $ \from ->
start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
start from file (key, _) = stopUnless (not <$> inAnnex key) $
autoCopies file key (<) $ \_numcopies ->
autoCopies file key (<) $
case from of
Nothing -> go $ perform key file
Just src ->

View file

@ -169,7 +169,7 @@ checkPresent r o k = do
- ensure that files are only moved into place once complete
-}
partialParams :: CommandParam
partialParams = Params "--no-inplace --partial --partial-dir=.rsync-partial"
partialParams = Params "--partial --partial-dir=.rsync-partial"
{- Runs an action in an empty scratch directory that can be used to build
- up trees for rsync. -}

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="http://joeyh.name/"
subject="comment 4"
date="2012-07-08T18:13:58Z"
content="""
On kFreeBSD, I get this:
$ sysctl kern.maxfilesperproc
kern.maxfilesperproc: 11095
But ulimit still has 1024 limit, so you'd need to adjust both, as root. Messy..
"""]]

View file

@ -0,0 +1,19 @@
"git annex watch" will add the file generated by "git annex map", which is probably not intended. Shouldnt this file be created in /tmp or .git/annex/ or somewhere else?
/tmp $ cd test/
/tmp/test $ git init
Initialized empty Git repository in /tmp/test/.git/
/tmp/test $ git annex init
init ok
(Recording state in git...)
/tmp/test $ git annex watch
/tmp/test $ git annex map
map /tmp/test ok
running: dot -Tx11 map.dot
ok
/tmp/test $ ls -l
insgesamt 4
lrwxrwxrwx 1 jojo jojo 180 Jul 15 23:36 map.dot -> .git/annex/objects/P3/76/SHA256-s208--44199582b5948512ff12cf03de0b86fa1bebf09785dba2827fe52afee0afbe3d/SHA256-s208--44199582b5948512ff12cf03de0b86fa1bebf09785dba2827fe52afee0afbe3d

View file

@ -0,0 +1,60 @@
As you can see, I'm running a pretty recent build of git-annex (ac799c3f363e0008b23e9c174e6fedc35e6fa92a),
$ git annex version
git-annex version: 3.20120630
local repository version: 3
default repository version: 3
supported repository versions: 3
upgrade supported from repository versions: 0 1 2
We have a file here which isn't currently available yet isn't
currently available (the link is shown in red),
$ ls -l plot.py
lrwxrwxrwx 1 ben ben 77 Jul 6 14:01 plot.py -> ../.git/annex/objects/WORM:1301941019:720:plot.py/WORM:1301941019:720:plot.py
$
Yet git-annex should be able to tell us where it is,
$ git-annex whereis plot.py
$
Hmm, well that's strange. What's happening here,
$ git-annex whereis plot.py -d
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","show-ref","git-annex"]
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","show-ref","--hash","refs/heads/git-annex"]
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","log","refs/heads/git-annex..d5582e05f41011b571a17003934fe9e40859e4be","--oneline","-n1"]
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","cat-file","--batch"]
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","ls-files","--cached","-z","--","plot.py"]
$
Alright, well maybe `git-annex get` will work,
$ git annex get plot.py -d
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","ls-files","--cached","-z","--","plot.py"]
$ ls -l plot.py
lrwxrwxrwx 1 ben ben 77 Jul 6 14:01 plot.py -> ../.git/annex/objects/WORM:1301941019:720:plot.py/WORM:1301941019:720:plot.py
Nope, the link is still shown in red.
Alright, what about `git-annex copy`?
$ git annex copy plot.py --from=goldnerlab --to=here -d
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","show-ref","git-annex"]
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","show-ref","--hash","refs/heads/git-annex"]
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","log","refs/heads/git-annex..d5582e05f41011b571a17003934fe9e40859e4be","--oneline","-n1"]
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","cat-file","--batch"]
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","ls-files","--cached","-z","--","plot.py"]
$ ls -l plot.py
lrwxrwxrwx 1 ben ben 77 Jul 6 14:01 plot.py -> ../.git/annex/objects/WORM:1301941019:720:plot.py/WORM:1301941019:720:plot.py
Still red.
Alright, what if I just try to get a non-existent file?
$ git annex get adsflkah -d
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","ls-files","--cached","-z","--","adsflkah"]
$
Alright, it didn't fail with an error, that's very strange. What is going on here?

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
nickname="Ben"
subject="comment 10"
date="2012-07-10T14:17:42Z"
content="""
Hmm, the commands above seem to have worked on both machines (both running 3.20120630). I guess I should probably just try rebuilding my data/ repository from scratch, eh?
"""]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
nickname="Justin"
subject="comment 11"
date="2012-07-10T14:26:06Z"
content="""
I suppose.. joey can probably help you investigate exactly what went wrong. You might want to save an empty clone of the git repository for later..
The easiest way to fix the data is probably to run a `git annex uninit` in the old repository which will put the files back how they were before and then `git-annex import` them into a new repository.
"""]]

View file

@ -0,0 +1,18 @@
[[!comment format=mdwn
username="http://joeyh.name/"
ip="165.98.113.100"
subject="comment 12"
date="2012-07-11T23:23:04Z"
content="""
Looking at this a leetle more closely, you had:
<pre>
lrwxrwxrwx 1 ben ben 77 Jul 6 14:01 plot.py -> ../.git/annex/objects/WORM:1301941019:720:plot.py/WORM:1301941019:720:plot.py
</pre>
Well, that is not how a git-annex symlink currently looks, so it ignores it.
Apparenly this repository was created with an old version of git-annex, possibly version 1, and you've dropped in the current version, but the normal upgrade machinery failed. This could happen if you made a new clone of a version 1 bare repository.
I suggest you first find out what version of git-annex was originally used to create this repository (ie, version 0, 1, or 2 ... probably 1). Then make a clone, and \"git config annex.version $N\" where N=the version used). Then \"git annex upgrade\" and you should be good to go. Remember to push or sync the upgrade back to the bare repo so you don't need to do this again.
"""]]

View file

@ -0,0 +1,7 @@
[[!comment format=mdwn
username="http://joeyh.name/"
subject="comment 1"
date="2012-07-09T23:16:32Z"
content="""
`git ls-files` is not listing your file. Perhaps your file is not checked into git?
"""]]

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
nickname="Ben"
subject="comment 2"
date="2012-07-09T23:31:08Z"
content="""
Not really sure what to say about that other than,
$ git --git-dir=/home/ben/lori/analysis/data/.git --work-tree=/home/ben/lori/analysis/data ls-files --cached -- plot.py
plot.py
$
"""]]

View file

@ -0,0 +1,14 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
nickname="Justin"
subject="Remotes? "
date="2012-07-10T00:23:11Z"
content="""
What does
git-annex status
Show?
Do you have any remotes configured? It looks like you don't somehow.
"""]]

View file

@ -0,0 +1,46 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
nickname="Ben"
subject="comment 4"
date="2012-07-10T01:46:23Z"
content="""
$ git annex status
supported backends: SHA256 SHA1 SHA512 SHA224 SHA384 SHA256E SHA1E SHA512E SHA224E SHA384E WORM URL
supported remote types: git S3 bup directory rsync web hook
trusted repositories: 0
semitrusted repositories: 3
00000000-0000-0000-0000-000000000001 -- web
02e4ea72-a77c-11e1-bbd7-0749b04e4b59 -- goldnerlab (Data for Goldner)
3c1fd026-c794-11e1-8ebb-dbe8684e8a73 -- here
untrusted repositories: 0
dead repositories: 0
transfers in progress: none
available local disk space: 16 gigabytes (+1 megabyte reserved)
local annex keys: 0
local annex size: 0 bytes
known annex keys: 0
known annex size: 0 bytes
bloom filter size: 16 mebibytes (0% full)
backend usage:
$ git remote
goldnerlab
$ git remote show goldnerlab
* remote goldnerlab
Fetch URL: goldnerlab:data
Push URL: goldnerlab:data
HEAD branch (remote HEAD is ambiguous, may be one of the following):
master
synced/master
Remote branches:
git-annex tracked
master tracked
synced/master tracked
Local branch configured for 'git pull':
master merges with remote master
Local refs configured for 'git push':
git-annex pushes to git-annex (up to date)
master pushes to master (up to date)
synced/master pushes to synced/master (up to date)
"""]]

View file

@ -0,0 +1,16 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
nickname="Justin"
subject="comment 5"
date="2012-07-10T03:03:27Z"
content="""
Well that's odd. You have remotes but no annexed files..
Can you post the commands you used to arrive at this situation? I'm not sure how you would have done that.. Maybe you just need a
git-annex sync
to get things going?
I think somehow you cloned the git repo but not the annex stuff.
"""]]

View file

@ -0,0 +1,44 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
nickname="Ben"
subject="comment 6"
date="2012-07-10T03:26:35Z"
content="""
I can easily reproduce the issue as follows,
$ git clone goldnerlab:data
Cloning into 'data'...
remote: Counting objects: 61902, done.
remote: Compressing objects: 100% (61354/61354), done.
remote: Total 61902 (delta 356), reused 61902 (delta 356)
Receiving objects: 100% (61902/61902), 5.50 MiB | 894 KiB/s, done.
Resolving deltas: 100% (356/356), done.
$ cd data
$ git annex sync
(merging origin/git-annex into git-annex...)
commit
(Recording state in git...)
# On branch master
nothing to commit (working directory clean)
ok
pull origin
ok
push origin
Counting objects: 8, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (5/5), done.
Writing objects: 100% (6/6), 726 bytes, done.
Total 6 (delta 1), reused 1 (delta 0)
Auto packing the repository for optimum performance.
warning: There are too many unreachable loose objects; run 'git prune' to remove them.
To goldnerlab:data
d5582e0..aaddf3c git-annex -> git-annex
ok
Everything looks good so far. I verify that alex/plot.py doesn't exist. Now let's try getting it,
$ git annex get alex/plot.py -d
git [\"--git-dir=/home/ben/data/.git\",\"--work-tree=/home/ben/data\",\"ls-files\",\"--cached\",\"-z\",\"--\",\"alex/plot.py\"]
Uh oh. ls confirms that get was unsucessful.
"""]]

View file

@ -0,0 +1,67 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
nickname="Justin"
subject="comment 7"
date="2012-07-10T12:37:43Z"
content="""
But how was the goldnerlab:data repository created? That looks to be where the problem is..
I have a slightly older version, but in general it should work the same..
you can see right away, when I do git annex status it shows \"known annex keys: 1\".
if you do git annex status on goldnerlab, does it say you have any annex keys?
$ git-annex version
git-annex version: 3.20120614~bpo60+1
$ mkdir a
$ cd a
$ git init
Initialized empty Git repository in /tmp/a/.git/
$ git annex init a
init a ok
(Recording state in git...)
$ echo hi > file
$ git annex add file
add file (checksum...) ok
(Recording state in git...)
$ git commit -m added
fatal: No HEAD commit to compare with (yet)
fatal: No HEAD commit to compare with (yet)
[master (root-commit) cfa9049] added
1 files changed, 1 insertions(+), 0 deletions(-)
create mode 120000 file
$ cd ..
$ git clone a a_clone
Cloning into a_clone...
done.
$ cd a_clone
$ git annex status
(merging origin/git-annex into git-annex...)
supported backends: SHA256 SHA1 SHA512 SHA224 SHA384 SHA256E SHA1E SHA512E SHA224E SHA384E WORM URL
supported remote types: git bup directory rsync web hook
trusted repositories: 0
semitrusted repositories: 3
00000000-0000-0000-0000-000000000001 -- web
445d616e-ca8b-11e1-b170-ff8b03c54243 -- origin (a)
5d3db51c-ca8b-11e1-bbc3-039dd06ab47b -- here
untrusted repositories: 0
dead repositories: 0
available local disk space: 63 megabytes (+1 megabyte reserved)
local annex keys: 0
local annex size: 0 bytes
known annex keys: 1
known annex size: 3 bytes
backend usage:
SHA256: 1
(Recording state in git...)
$ ls
file
$ cat file
cat: file: No such file or directory
$ git annex get file
get file (from origin...) ok
(Recording state in git...)
$ cat file
hi
"""]]

View file

@ -0,0 +1,30 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
nickname="Ben"
subject="comment 8"
date="2012-07-10T13:02:37Z"
content="""
On goldnerlab,
$ git annex status
supported backends: SHA256 SHA1 SHA512 SHA224 SHA384 SHA256E SHA1E SHA512E SHA224E SHA384E WORM URL
supported remote types: git S3 bup directory rsync web hook
trusted repositories: 0
semitrusted repositories: 4
00000000-0000-0000-0000-000000000001 -- web
02e4ea72-a77c-11e1-bbd7-0749b04e4b59 -- here (Data for Goldner)
351f3ddc-ca3e-11e1-a3fc-6338ef4724a7
3c1fd026-c794-11e1-8ebb-dbe8684e8a73
untrusted repositories: 0
dead repositories: 0
transfers in progress: none
available local disk space: 2 terabytes (+1 megabyte reserved)
local annex keys: 19101
local annex size: 41 gigabytes
known annex keys: 19122
known annex size: 41 gigabytes
bloom filter size: 16 mebibytes (3.8% full)
backend usage:
WORM: 38223
"""]]

View file

@ -0,0 +1,17 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
nickname="Justin"
subject="comment 9"
date="2012-07-10T14:08:10Z"
content="""
Can you run the series of commands I had above on your two machines? I figure there are two possibilities:
1. There is something wrong with the git-annex versions you are using.
2. There is something wrong with your repository. (\"warning: There are too many unreachable loose objects\"?)
so if you can make a temp repository on goldnerlab, then clone it on the other machine and see where it fails, that would be helpful.
after cloning git-annex status should hopefully say that you have 1 known key, not 0.
Obviously this won't fix the problem, but it will at least narrow it down.
"""]]

View file

@ -1,14 +0,0 @@
We need a way to calculate space taken by certain files.
Use cases: I want to drop some files from my small disk. I need to figure out things that take most space, and drop them.
Usage examples:
git annex du -hs *.mp3
git annex du -sBm --in=here *.ogg
Would be nice if it was compatible with standard unix `df`.
> `du -L` works.
>
> See also: [[forum/Wishlist:_getting_the_disk_used_by_a_subtree_of_files]]

View file

@ -12,12 +12,13 @@ Feel free to chip in with comments! --[[Joey]]
* Month 3 "easy setup": [[!traillink configurators]] [[!traillink pairing]]
* Month 4 "polishing": [[!traillink cloud]] [[!traillink leftovers]]
* Months 5-6 "9k bonus round": [[!traillink Android]] [[!traillink partial_content]]
* Months 7-11: user-driven features and polishing
* Month 12: "Windows purgatory" [[Windows]]
## not yet on the map:
* [[desymlink]]
* [[deltas]]
* In my overfunded nighmares: [[Windows]]
## blog

View file

@ -23,7 +23,7 @@ In other words, I was lost in the weeds for a lot of those hours...
At one point, something glorious happened, and it was always making exactly
one commit for batch mode modifications of a lot of files (like untarring
them). Unfortunatly, I had to lose that gloriousness due to another
them). Unfortunately, I had to lose that gloriousness due to another
potential race, which, while unlikely, would have made the program deadlock
if it happened.
@ -40,7 +40,7 @@ are still open for write.
This works great! Starting up `git annex watch` when processes have files
open is no longer a problem, and even if you're evil enough to try having
muliple processes open the same file, it will complain and not annex it
multiple processes open the same file, it will complain and not annex it
until all the writers close it.
(Well, someone really evil could turn the write bit back on after git annex

View file

@ -3,13 +3,13 @@ to `kqueue`, and Haskell code to use that library. By now I think I
understand kqueue fairly well -- there are some very tricky parts to the
interface.
But... it still did't work. After building all this, my code was
But... it still didn't work. After building all this, my code was
failing the same way that the
[haskell kqueue library failed](https://github.com/hesselink/kqueue/issues/1)
yesterday. I filed a [bug report with a testcase]().
Then I thought to ask on #haskell. Got sorted out in quick order! The
problem turns out to be that haskell's runtime has a peridic SIGALARM,
problem turns out to be that haskell's runtime has a periodic SIGALARM,
that is interrupting my kevent call. It can be worked around with `+RTS -V0`,
but I put in a fix to retry to kevent when it's interrupted.

View file

@ -10,13 +10,13 @@ But it's not all easy. Syncing should happen as fast as possible, so
changes show up without delay. Eventually it'll need to support syncing
between nodes that cannot directly contact one-another. Syncing needs to
deal with nodes coming and going; one example of that is a USB drive being
plugged in, which should immediatly be synced, but network can also come
plugged in, which should immediately be synced, but network can also come
and go, so it should periodically retry nodes it failed to sync with. To
start with, I'll be focusing on fast syncing between directly connected
nodes, but I have to keep this wider problem space in mind.
One problem with `git annex sync` is that it has to be run in both clones
in order for changes to fully propigate. This is because git doesn't allow
in order for changes to fully propagate. This is because git doesn't allow
pushing changes into a non-bare repository; so instead it drops off a new
branch in `.git/refs/remotes/$foo/synced/master`. Then when it's run locally
it merges that new branch into `master`.

View file

@ -12,7 +12,7 @@ not sufficient. There are two problems with it:
So, instead, git-annex will use a regular `git merge`, and if it fails, it
will fix up the conflicts.
That presented its own difficully, of finding which files in the tree
That presented its own difficulty, of finding which files in the tree
conflict. `git ls-files --unmerged` is the way to do that, but its output
is a quite raw form:
@ -21,9 +21,9 @@ is a quite raw form:
100644 1eabec834c255a127e2e835dadc2d7733742ed9a 2 bar
100644 36902d4d842a114e8b8912c02d239b2d7059c02b 3 bar
I had to stare at the rather inpenetrable documentation for hours and
I had to stare at the rather impenetrable documentation for hours and
write a lot of parsing and processing code to get from that to these mostly
self expanatory data types:
self explanatory data types:
data Conflicting v = Conflicting
{ valUs :: Maybe v

View file

@ -35,7 +35,7 @@ more threads:
1. Uploads new data to every configured remote. Triggered by the watcher
thread when it adds content. Easy; just use a `TSet` of Keys to send.
2. Downloads new data from the cheapest remote that has it. COuld be
2. Downloads new data from the cheapest remote that has it. Could be
triggered by the
merger thread, after it merges in a git sync. Rather hard; how does it
work out what new keys are in the tree without scanning it all? Scan

View file

@ -1,6 +1,6 @@
Well, sometimes you just have to go for the hack. Trying to find a way
to add additional options to git-annex-shell without breaking backwards
compatability, I noticed that it ignores all options after `--`, because
compatibility, I noticed that it ignores all options after `--`, because
those tend to be random rsync options due to the way rsync runs it.
So, I've added a new class of options, that come in between, like

View file

@ -21,5 +21,5 @@ nontrivial features can be added easily.
--
Next up: Enough nonsense with tracking tranfers... Time to start actually
Next up: Enough nonsense with tracking transfers... Time to start actually
transferring content around!

View file

@ -6,7 +6,7 @@ Details follow..
Made the committer thread queue Upload Transfers when new files
are added to the annex. Currently it tries to transfer the new content
to *every* remote; this innefficiency needs to be addressed later.
to *every* remote; this inefficiency needs to be addressed later.
Made the watcher thread queue Download Transfers when new symlinks
appear that point to content we don't have. Typically, that will happen
@ -30,12 +30,12 @@ all the assistant's other threads from entering that monad while a transfer
is running. This is also necessary to allow multiple concurrent transfers
to run in the future.
This is a very tricky peice of code, because that thread will modify the
This is a very tricky piece of code, because that thread will modify the
git-annex branch, and its parent thread has to invalidate its cache in
order to see any changes the child thread made. Hopefully that's the extent
of the complication of doing this. The only reason this was possible at all
is that git-annex already support multiple concurrent processes running
and all making independant changes to the git-annex branch, etc.
and all making independent changes to the git-annex branch, etc.
After all my groundwork this week, file content transferring is now
fully working!

View file

@ -0,0 +1,31 @@
Spent most of the day making file content transfers robust. There were lots
of bugs, hopefully I've fixed most of them. It seems to work well now,
even when I throw a lot of files at it.
One of the changes also sped up transfers; it no longer roundtrips to the
remote to verify it has a file. The idea here is that when the assistant is
running, repos should typically be fairly tightly synced to their remotes
by it, so some of the extra checks that the `move` command does are
unnecessary.
Also spent some time trying to use ghc's threaded runtime, but continue to
be baffled by the random hangs when using it. This needs fixing eventually;
all the assistant's threads can potentially be blocked when it's waiting on
an external command it has run.
Also changed how transfer info files are locked. The lock file is now
separate from the info file, which allows the TransferWatcher thread to
notice when an info file is created, and thus actually track transfers
initiated by remotes.
---
I'm fairly close now to merging the `assistant` branch into `master`.
The data syncing code is very brute-force, but it will work well enough
for a first cut.
Next I can either add some repository network mapping, and use graph
analysis to reduce the number of data transfers, or I can move on to the
[[webapp]]. Not sure yet which I'll do. It's likely that since DebConf
begins tomorrow I'll put off either of those big things until after the
conference.

View file

@ -0,0 +1,17 @@
I didn't plan to work on git-annex much while at DebConf, because the conference
always prevents the kind of concentration I need. But I unexpectedly also had to deal
with [three dead drives](http://joeyh.name/blog/entry/I_am_become_Joey_destroyer_of_drives/)
and illness this week.
That said, I have been trying to debug a problem with git-annex and Haskell's threaded
runtime all week. It just hangs, randomly. No luck so far isolating why, although I now
have a branch that hangs fairly reliably, and in which I am trying to whittle the entire
git-annex code base (all 18 thousand lines!) into a nice test case.
This threaded runtime problem doesn't affect the assistant yet, but if I want to use
Yesod in developing the webapp, I'll need the threaded runtime, and using the threaded
runtime in the assistant generally would make it more responsive and less hacky.
Since this is a task I can work on without much concentration, I'll probably keep beating
on it until I return home. Then I need to spend some quality thinking time on where
to go next in the assistant.

View file

@ -1,6 +1,6 @@
Last night I got `git annex watch` to also handle deletion of files.
This was not as tricky as feared; the key is using `git rm --ignore-unmatch`,
which avoids most problimatic situations (such as a just deleted file
which avoids most problematic situations (such as a just deleted file
being added back before git is run).
Also fixed some races when `git annex watch` is doing its startup scan of

View file

@ -0,0 +1,9 @@
Managed to find a minimal, 20 line test case for at least one of the ways
git-annex was hanging with GHC's threaded runtime. Sent it off to
haskell-cafe for analysis.
[thread](http://news.gmane.org/gmane.comp.lang.haskell.cafe)
Further managed to narrow the bug down to MissingH's use of logging code,
that git-annex doesn't use. [bug report](http://bugs.debian.org/681621).
So, I can at least get around this problem with a modified version of
MissingH. Hopefully that was the only thing causing the hangs I was seeing!

View file

@ -16,7 +16,7 @@ thread that wakes up periodically, flushes the queue, and autocommits.
(This will, in fact, be the start of the [[syncing]] phase of my roadmap!)
There's lots of room here for smart behavior. Like, if a lot of changes are
being made close together, wait for them to die down before committing. Or,
if it's been idle and a single file appears, commit it immediatly, since
if it's been idle and a single file appears, commit it immediately, since
this is probably something the user wants synced out right away. I'll start
with something stupid and then add the smarts.

View file

@ -11,7 +11,7 @@ things slow and ugly. This was not unexpected.
So next, I added some smarts to it. First, I wanted to stop it waking up
every second when there was nothing to do, and instead blocking wait on a
change occuring. Secondly, I wanted it to know when past changes happened,
change occurring. Secondly, I wanted it to know when past changes happened,
so it could detect batch mode scenarios, and avoid committing too
frequently.
@ -52,6 +52,6 @@ shouldCommit now changetimes
thisSecond t = now `diffUTCTime` t <= 1
"""]]
Still some polishing to do to eliminate minor innefficiencies and deal
Still some polishing to do to eliminate minor inefficiencies and deal
with more races, but this part of the git-annex assistant is now very usable,
and will be going out to my beta testers soon!

View file

@ -24,7 +24,7 @@ symlinks might have just been deleted and re-added, or changed, and
the index still have the old value.
Instead, I got creative. :) We can't trust what the index says about the
symlink, but if the index happens to contian a symlink that looks right,
symlink, but if the index happens to contain a symlink that looks right,
we can trust that the SHA1 of its blob is the right SHA1, and reuse it
when re-staging the symlink. Wham! Massive speedup!

View file

@ -10,7 +10,7 @@ own git index parser (or use one from Hackage), this check requires running
tree of files is being moved or unpacked into the watched directory.
Instead, I made it only do the check during `git annex watch`'s initial
scan of the tree. This should be ok, because once it's running, you
scan of the tree. This should be OK, because once it's running, you
won't be adding new files to git anyway, since it'll automatically annex
new files. This is good enough for now, but there are at least two problems
with it:

View file

@ -16,7 +16,7 @@ quickly is really only important so people don't think it's a resource hog.
First impressions are important. :)
But what does "made recently" mean exactly? Well, my answer is possibly
overengineered, but most of it is really groundwork for things I'll need
over engineered, but most of it is really groundwork for things I'll need
later anyway. I added a new data structure for tracking the status of the
daemon, which is periodically written to disk by another thread (thread #6!)
to `.git/annex/daemon.status` Currently it looks like this; I anticipate

View file

@ -3,11 +3,11 @@ all the other git clones, at both the git level and the key/value level.
## immediate action items
* Check that download transfer triggering code works (when a symlink appears
and the remote does *not* upload to us.
* At startup, and possibly periodically, look for files we have that
location tracking indicates remotes do not, and enqueue Uploads for
them. Also, enqueue Downloads for any files we're missing.
* After git sync, identify content that we don't have that is now available
on remotes, and transfer.
## longer-term TODO
@ -29,6 +29,9 @@ all the other git clones, at both the git level and the key/value level.
only uploading new files but not downloading, and only downloading
files in some directories and not others. See for use cases:
[[forum/Wishlist:_options_for_syncing_meta-data_and_data]]
* speed up git syncing by using the cached ssh connection for it too
(will need to use `GIT_SSH`, which needs to point to a command to run,
not a shell command line)
## misc todo

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
nickname="Ben"
subject="ARM support"
date="2012-07-13T16:51:15Z"
content="""
The closure of [this](http://hackage.haskell.org/trac/ghc/ticket/5839) ticket hopefully marks the end of TH issues on ARM. As of 7.4.2, GHC's linker has enough ARM support to allow a selection of common packages compile on my PandaBoard. That being said, it hasn't had a whole lot of testing so it's possible I still need to implement a few relocation types.
"""]]

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawncBlzaDI248OZGjKQMXrLVQIx4XrZrzFo"
nickname="Perttu"
subject="comment 2"
date="2012-07-07T17:45:43Z"
content="""
Ah, I didn't read the man page carefully enough. My apologies.
Setting the ignore status based on an exit status would be
even better, since this avoids re-writing a new config file for
each repository each time I enter or exit my LAN.
"""]]

View file

@ -0,0 +1,12 @@
Is there a way to pull from an encrypted remote?
Use case:
1. Have annex in an encrypted public rsync remote
2. Have USB stick with PGP keys (but not the annex repository)
3. Get to a new computer
4. Set up a new annex using the PGP keys I have.
1-3 work fine :) However, 4'th is the issue:
How would I do `git pull <remote>` for an encrypted remote? Is it possible?

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
nickname="Justin"
subject="comment 1"
date="2012-07-10T18:14:31Z"
content="""
You just need to `git clone` the existing repository and make sure the `git remote`s are setup.
the 'rsync remote' is not actually the annex, it's just a collection of encrypted files with obfuscated names. You need a copy of the actual repository to restore the files.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="http://bergey.dreamwidth.org/"
ip="66.80.90.109"
subject="git-media"
date="2012-07-14T15:42:05Z"
content="""
I haven't used git-media, but from the README it looks as though they now support several backends. Might want to update the (very helpful!) comparison.
"""]]

View file

@ -0,0 +1,8 @@
When the [[design/assistant]] is running on a pair of remotes, I've seen
them get out of sync, such that every pull and merge results in a conflict,
that then has to be auto-resolved.
This seems similar to the laddering problem described in this old bug:
[[bugs/making_annex-merge_try_a_fast-forward]]
--[[Joey]]

View file

@ -0,0 +1,16 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawmB-gCGEs--zfmvYU-__Hj2FbliUXgxMDs"
nickname="Jakub"
subject="Path problems"
date="2012-07-13T19:15:15Z"
content="""
Hi,
I have a same 'git-annex-shell command not found' problem as above. I've installed git annex via cabal into my ~/.haskell_bin directory. Then I've added this dir both to ~/.bashrc and ~/.zshrc. I can run git annex or 'git annex-shell' and everything is fine. My guess is that haskell is trying to spawn git-annex-shell with some current $PATH unaware shell like dash maybe?
I've fixed this behavior by using a really ugly hack - I've symlinked ~/.haskell_bin/git-annex-shell to /usr/bin/git-annex-shell on all my machines and the problem is gone. Somehow haskell (or whatever is trying to call git-annex-shell) is unaware of path modifications from .bashrc/.zshrc
Here is the path modification I've used:
export PATH=~/.haskell_bin:$PATH
"""]]

View file

@ -0,0 +1,23 @@
[[!comment format=mdwn
username="https://www.google.com/accounts/o8/id?id=AItOawmB-gCGEs--zfmvYU-__Hj2FbliUXgxMDs"
nickname="Jakub"
subject="Fixed"
date="2012-07-13T19:27:46Z"
content="""
Found the problem:
One should never use ~ in such path:
WRONG export PATH=~/somedir:$PATH
Instead one should use $HOME:
GOOD export PATH=$HOME/somedir:$PATH
Can I surpress the message that shell failed with status 255 when a repo is unavailible? I've got two repos pointing to one machine - either via vpn or local lan and I keep getting erros if one is unavailible:
ssh: connect to host 10.9.0.1 port 39882: No route to host
Command ssh [\"-S\",\"/home/pielgrzym/annex/.git/annex/ssh/nas\",\"-o\",\"ControlMaster=auto\",\"-o\",\"ControlPersist=yes\",\"nas\",\"git-annex-shell 'configlist' '/~/annex'\"] failed; exit code 255
"""]]