Merge branch 'master' into assistant
This commit is contained in:
commit
fa3aef96e2
50 changed files with 582 additions and 46 deletions
|
@ -72,7 +72,7 @@ getVersionString = do
|
|||
getGitVersion :: Test
|
||||
getGitVersion = do
|
||||
(_, s) <- pipeFrom "git" ["--version"]
|
||||
let version = last $ words $ head $ lines s
|
||||
let version = unwords $ drop 2 $ words $ head $ lines s
|
||||
return $ Config "gitversion" (StringConfig version)
|
||||
|
||||
getSshConnectionCaching :: Test
|
||||
|
|
15
Command.hs
15
Command.hs
|
@ -21,6 +21,7 @@ module Command (
|
|||
isBareRepo,
|
||||
numCopies,
|
||||
autoCopies,
|
||||
autoCopiesWith,
|
||||
module ReExported
|
||||
) where
|
||||
|
||||
|
@ -112,8 +113,18 @@ numCopies file = readish <$> checkAttr "annex.numcopies" file
|
|||
- In auto mode, first checks that the number of known
|
||||
- copies of the key is > or < than the numcopies setting, before running
|
||||
- the action. -}
|
||||
autoCopies :: FilePath -> Key -> (Int -> Int -> Bool) -> (Maybe Int -> CommandStart) -> CommandStart
|
||||
autoCopies file key vs a = do
|
||||
autoCopies :: FilePath -> Key -> (Int -> Int -> Bool) -> CommandStart -> CommandStart
|
||||
autoCopies file key vs a = Annex.getState Annex.auto >>= go
|
||||
where
|
||||
go False = a
|
||||
go True = do
|
||||
numcopiesattr <- numCopies file
|
||||
needed <- getNumCopies numcopiesattr
|
||||
(_, have) <- trustPartition UnTrusted =<< Remote.keyLocations key
|
||||
if length have `vs` needed then a else stop
|
||||
|
||||
autoCopiesWith :: FilePath -> Key -> (Int -> Int -> Bool) -> (Maybe Int -> CommandStart) -> CommandStart
|
||||
autoCopiesWith file key vs a = do
|
||||
numcopiesattr <- numCopies file
|
||||
Annex.getState Annex.auto >>= auto numcopiesattr
|
||||
where
|
||||
|
|
|
@ -24,5 +24,5 @@ seek = [withField Command.Move.toOption Remote.byName $ \to ->
|
|||
-- A copy is just a move that does not delete the source file.
|
||||
-- However, --auto mode avoids unnecessary copies.
|
||||
start :: Maybe Remote -> Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
|
||||
start to from file (key, backend) = autoCopies file key (<) $ \_numcopies ->
|
||||
start to from file (key, backend) = autoCopies file key (<) $
|
||||
Command.Move.start to from False file (key, backend)
|
||||
|
|
|
@ -30,7 +30,7 @@ seek = [withField fromOption Remote.byName $ \from ->
|
|||
withFilesInGit $ whenAnnexed $ start from]
|
||||
|
||||
start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
|
||||
start from file (key, _) = autoCopies file key (>) $ \numcopies ->
|
||||
start from file (key, _) = autoCopiesWith file key (>) $ \numcopies ->
|
||||
case from of
|
||||
Nothing -> startLocal file numcopies key
|
||||
Just remote -> do
|
||||
|
|
|
@ -24,7 +24,7 @@ seek = [withField Command.Move.fromOption Remote.byName $ \from ->
|
|||
|
||||
start :: Maybe Remote -> FilePath -> (Key, Backend) -> CommandStart
|
||||
start from file (key, _) = stopUnless (not <$> inAnnex key) $
|
||||
autoCopies file key (<) $ \_numcopies ->
|
||||
autoCopies file key (<) $
|
||||
case from of
|
||||
Nothing -> go $ perform key file
|
||||
Just src ->
|
||||
|
|
|
@ -169,7 +169,7 @@ checkPresent r o k = do
|
|||
- ensure that files are only moved into place once complete
|
||||
-}
|
||||
partialParams :: CommandParam
|
||||
partialParams = Params "--no-inplace --partial --partial-dir=.rsync-partial"
|
||||
partialParams = Params "--partial --partial-dir=.rsync-partial"
|
||||
|
||||
{- Runs an action in an empty scratch directory that can be used to build
|
||||
- up trees for rsync. -}
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
[[!comment format=mdwn
|
||||
username="http://joeyh.name/"
|
||||
subject="comment 4"
|
||||
date="2012-07-08T18:13:58Z"
|
||||
content="""
|
||||
On kFreeBSD, I get this:
|
||||
|
||||
$ sysctl kern.maxfilesperproc
|
||||
kern.maxfilesperproc: 11095
|
||||
|
||||
But ulimit still has 1024 limit, so you'd need to adjust both, as root. Messy..
|
||||
"""]]
|
19
doc/bugs/__34__git_annex_watch__34___adds_map.dot.mdwn
Normal file
19
doc/bugs/__34__git_annex_watch__34___adds_map.dot.mdwn
Normal file
|
@ -0,0 +1,19 @@
|
|||
"git annex watch" will add the file generated by "git annex map", which is probably not intended. Shouldn’t this file be created in /tmp or .git/annex/ or somewhere else?
|
||||
|
||||
/tmp $ cd test/
|
||||
/tmp/test $ git init
|
||||
Initialized empty Git repository in /tmp/test/.git/
|
||||
/tmp/test $ git annex init
|
||||
init ok
|
||||
(Recording state in git...)
|
||||
/tmp/test $ git annex watch
|
||||
/tmp/test $ git annex map
|
||||
map /tmp/test ok
|
||||
|
||||
running: dot -Tx11 map.dot
|
||||
|
||||
ok
|
||||
/tmp/test $ ls -l
|
||||
insgesamt 4
|
||||
lrwxrwxrwx 1 jojo jojo 180 Jul 15 23:36 map.dot -> .git/annex/objects/P3/76/SHA256-s208--44199582b5948512ff12cf03de0b86fa1bebf09785dba2827fe52afee0afbe3d/SHA256-s208--44199582b5948512ff12cf03de0b86fa1bebf09785dba2827fe52afee0afbe3d
|
||||
|
60
doc/bugs/git_annex_does_nothing_useful.mdwn
Normal file
60
doc/bugs/git_annex_does_nothing_useful.mdwn
Normal file
|
@ -0,0 +1,60 @@
|
|||
As you can see, I'm running a pretty recent build of git-annex (ac799c3f363e0008b23e9c174e6fedc35e6fa92a),
|
||||
|
||||
$ git annex version
|
||||
git-annex version: 3.20120630
|
||||
local repository version: 3
|
||||
default repository version: 3
|
||||
supported repository versions: 3
|
||||
upgrade supported from repository versions: 0 1 2
|
||||
|
||||
We have a file here which isn't currently available yet isn't
|
||||
currently available (the link is shown in red),
|
||||
|
||||
$ ls -l plot.py
|
||||
lrwxrwxrwx 1 ben ben 77 Jul 6 14:01 plot.py -> ../.git/annex/objects/WORM:1301941019:720:plot.py/WORM:1301941019:720:plot.py
|
||||
$
|
||||
|
||||
Yet git-annex should be able to tell us where it is,
|
||||
|
||||
$ git-annex whereis plot.py
|
||||
$
|
||||
|
||||
Hmm, well that's strange. What's happening here,
|
||||
|
||||
$ git-annex whereis plot.py -d
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","show-ref","git-annex"]
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","show-ref","--hash","refs/heads/git-annex"]
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","log","refs/heads/git-annex..d5582e05f41011b571a17003934fe9e40859e4be","--oneline","-n1"]
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","cat-file","--batch"]
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","ls-files","--cached","-z","--","plot.py"]
|
||||
$
|
||||
|
||||
Alright, well maybe `git-annex get` will work,
|
||||
|
||||
$ git annex get plot.py -d
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","ls-files","--cached","-z","--","plot.py"]
|
||||
$ ls -l plot.py
|
||||
lrwxrwxrwx 1 ben ben 77 Jul 6 14:01 plot.py -> ../.git/annex/objects/WORM:1301941019:720:plot.py/WORM:1301941019:720:plot.py
|
||||
|
||||
Nope, the link is still shown in red.
|
||||
|
||||
Alright, what about `git-annex copy`?
|
||||
|
||||
$ git annex copy plot.py --from=goldnerlab --to=here -d
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","show-ref","git-annex"]
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","show-ref","--hash","refs/heads/git-annex"]
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","log","refs/heads/git-annex..d5582e05f41011b571a17003934fe9e40859e4be","--oneline","-n1"]
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","cat-file","--batch"]
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","ls-files","--cached","-z","--","plot.py"]
|
||||
$ ls -l plot.py
|
||||
lrwxrwxrwx 1 ben ben 77 Jul 6 14:01 plot.py -> ../.git/annex/objects/WORM:1301941019:720:plot.py/WORM:1301941019:720:plot.py
|
||||
|
||||
Still red.
|
||||
|
||||
Alright, what if I just try to get a non-existent file?
|
||||
|
||||
$ git annex get adsflkah -d
|
||||
git ["--git-dir=/home/ben/lori/analysis/data/.git","--work-tree=/home/ben/lori/analysis/data","ls-files","--cached","-z","--","adsflkah"]
|
||||
$
|
||||
|
||||
Alright, it didn't fail with an error, that's very strange. What is going on here?
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
|
||||
nickname="Ben"
|
||||
subject="comment 10"
|
||||
date="2012-07-10T14:17:42Z"
|
||||
content="""
|
||||
Hmm, the commands above seem to have worked on both machines (both running 3.20120630). I guess I should probably just try rebuilding my data/ repository from scratch, eh?
|
||||
"""]]
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
|
||||
nickname="Justin"
|
||||
subject="comment 11"
|
||||
date="2012-07-10T14:26:06Z"
|
||||
content="""
|
||||
I suppose.. joey can probably help you investigate exactly what went wrong. You might want to save an empty clone of the git repository for later..
|
||||
|
||||
The easiest way to fix the data is probably to run a `git annex uninit` in the old repository which will put the files back how they were before and then `git-annex import` them into a new repository.
|
||||
"""]]
|
|
@ -0,0 +1,18 @@
|
|||
[[!comment format=mdwn
|
||||
username="http://joeyh.name/"
|
||||
ip="165.98.113.100"
|
||||
subject="comment 12"
|
||||
date="2012-07-11T23:23:04Z"
|
||||
content="""
|
||||
Looking at this a leetle more closely, you had:
|
||||
|
||||
<pre>
|
||||
lrwxrwxrwx 1 ben ben 77 Jul 6 14:01 plot.py -> ../.git/annex/objects/WORM:1301941019:720:plot.py/WORM:1301941019:720:plot.py
|
||||
</pre>
|
||||
|
||||
Well, that is not how a git-annex symlink currently looks, so it ignores it.
|
||||
|
||||
Apparenly this repository was created with an old version of git-annex, possibly version 1, and you've dropped in the current version, but the normal upgrade machinery failed. This could happen if you made a new clone of a version 1 bare repository.
|
||||
|
||||
I suggest you first find out what version of git-annex was originally used to create this repository (ie, version 0, 1, or 2 ... probably 1). Then make a clone, and \"git config annex.version $N\" where N=the version used). Then \"git annex upgrade\" and you should be good to go. Remember to push or sync the upgrade back to the bare repo so you don't need to do this again.
|
||||
"""]]
|
|
@ -0,0 +1,7 @@
|
|||
[[!comment format=mdwn
|
||||
username="http://joeyh.name/"
|
||||
subject="comment 1"
|
||||
date="2012-07-09T23:16:32Z"
|
||||
content="""
|
||||
`git ls-files` is not listing your file. Perhaps your file is not checked into git?
|
||||
"""]]
|
|
@ -0,0 +1,13 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
|
||||
nickname="Ben"
|
||||
subject="comment 2"
|
||||
date="2012-07-09T23:31:08Z"
|
||||
content="""
|
||||
Not really sure what to say about that other than,
|
||||
|
||||
$ git --git-dir=/home/ben/lori/analysis/data/.git --work-tree=/home/ben/lori/analysis/data ls-files --cached -- plot.py
|
||||
plot.py
|
||||
$
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,14 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
|
||||
nickname="Justin"
|
||||
subject="Remotes? "
|
||||
date="2012-07-10T00:23:11Z"
|
||||
content="""
|
||||
What does
|
||||
|
||||
git-annex status
|
||||
|
||||
Show?
|
||||
|
||||
Do you have any remotes configured? It looks like you don't somehow.
|
||||
"""]]
|
|
@ -0,0 +1,46 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
|
||||
nickname="Ben"
|
||||
subject="comment 4"
|
||||
date="2012-07-10T01:46:23Z"
|
||||
content="""
|
||||
$ git annex status
|
||||
supported backends: SHA256 SHA1 SHA512 SHA224 SHA384 SHA256E SHA1E SHA512E SHA224E SHA384E WORM URL
|
||||
supported remote types: git S3 bup directory rsync web hook
|
||||
trusted repositories: 0
|
||||
semitrusted repositories: 3
|
||||
00000000-0000-0000-0000-000000000001 -- web
|
||||
02e4ea72-a77c-11e1-bbd7-0749b04e4b59 -- goldnerlab (Data for Goldner)
|
||||
3c1fd026-c794-11e1-8ebb-dbe8684e8a73 -- here
|
||||
untrusted repositories: 0
|
||||
dead repositories: 0
|
||||
transfers in progress: none
|
||||
available local disk space: 16 gigabytes (+1 megabyte reserved)
|
||||
local annex keys: 0
|
||||
local annex size: 0 bytes
|
||||
known annex keys: 0
|
||||
known annex size: 0 bytes
|
||||
bloom filter size: 16 mebibytes (0% full)
|
||||
backend usage:
|
||||
$ git remote
|
||||
goldnerlab
|
||||
$ git remote show goldnerlab
|
||||
* remote goldnerlab
|
||||
Fetch URL: goldnerlab:data
|
||||
Push URL: goldnerlab:data
|
||||
HEAD branch (remote HEAD is ambiguous, may be one of the following):
|
||||
master
|
||||
synced/master
|
||||
Remote branches:
|
||||
git-annex tracked
|
||||
master tracked
|
||||
synced/master tracked
|
||||
Local branch configured for 'git pull':
|
||||
master merges with remote master
|
||||
Local refs configured for 'git push':
|
||||
git-annex pushes to git-annex (up to date)
|
||||
master pushes to master (up to date)
|
||||
synced/master pushes to synced/master (up to date)
|
||||
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,16 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
|
||||
nickname="Justin"
|
||||
subject="comment 5"
|
||||
date="2012-07-10T03:03:27Z"
|
||||
content="""
|
||||
Well that's odd. You have remotes but no annexed files..
|
||||
|
||||
Can you post the commands you used to arrive at this situation? I'm not sure how you would have done that.. Maybe you just need a
|
||||
|
||||
git-annex sync
|
||||
|
||||
to get things going?
|
||||
|
||||
I think somehow you cloned the git repo but not the annex stuff.
|
||||
"""]]
|
|
@ -0,0 +1,44 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
|
||||
nickname="Ben"
|
||||
subject="comment 6"
|
||||
date="2012-07-10T03:26:35Z"
|
||||
content="""
|
||||
I can easily reproduce the issue as follows,
|
||||
|
||||
$ git clone goldnerlab:data
|
||||
Cloning into 'data'...
|
||||
remote: Counting objects: 61902, done.
|
||||
remote: Compressing objects: 100% (61354/61354), done.
|
||||
remote: Total 61902 (delta 356), reused 61902 (delta 356)
|
||||
Receiving objects: 100% (61902/61902), 5.50 MiB | 894 KiB/s, done.
|
||||
Resolving deltas: 100% (356/356), done.
|
||||
$ cd data
|
||||
$ git annex sync
|
||||
(merging origin/git-annex into git-annex...)
|
||||
commit
|
||||
(Recording state in git...)
|
||||
# On branch master
|
||||
nothing to commit (working directory clean)
|
||||
ok
|
||||
pull origin
|
||||
ok
|
||||
push origin
|
||||
Counting objects: 8, done.
|
||||
Delta compression using up to 2 threads.
|
||||
Compressing objects: 100% (5/5), done.
|
||||
Writing objects: 100% (6/6), 726 bytes, done.
|
||||
Total 6 (delta 1), reused 1 (delta 0)
|
||||
Auto packing the repository for optimum performance.
|
||||
warning: There are too many unreachable loose objects; run 'git prune' to remove them.
|
||||
To goldnerlab:data
|
||||
d5582e0..aaddf3c git-annex -> git-annex
|
||||
ok
|
||||
|
||||
Everything looks good so far. I verify that alex/plot.py doesn't exist. Now let's try getting it,
|
||||
|
||||
$ git annex get alex/plot.py -d
|
||||
git [\"--git-dir=/home/ben/data/.git\",\"--work-tree=/home/ben/data\",\"ls-files\",\"--cached\",\"-z\",\"--\",\"alex/plot.py\"]
|
||||
|
||||
Uh oh. ls confirms that get was unsucessful.
|
||||
"""]]
|
|
@ -0,0 +1,67 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
|
||||
nickname="Justin"
|
||||
subject="comment 7"
|
||||
date="2012-07-10T12:37:43Z"
|
||||
content="""
|
||||
But how was the goldnerlab:data repository created? That looks to be where the problem is..
|
||||
|
||||
I have a slightly older version, but in general it should work the same..
|
||||
you can see right away, when I do git annex status it shows \"known annex keys: 1\".
|
||||
if you do git annex status on goldnerlab, does it say you have any annex keys?
|
||||
|
||||
|
||||
$ git-annex version
|
||||
git-annex version: 3.20120614~bpo60+1
|
||||
$ mkdir a
|
||||
$ cd a
|
||||
$ git init
|
||||
Initialized empty Git repository in /tmp/a/.git/
|
||||
$ git annex init a
|
||||
init a ok
|
||||
(Recording state in git...)
|
||||
$ echo hi > file
|
||||
$ git annex add file
|
||||
add file (checksum...) ok
|
||||
(Recording state in git...)
|
||||
$ git commit -m added
|
||||
fatal: No HEAD commit to compare with (yet)
|
||||
fatal: No HEAD commit to compare with (yet)
|
||||
[master (root-commit) cfa9049] added
|
||||
1 files changed, 1 insertions(+), 0 deletions(-)
|
||||
create mode 120000 file
|
||||
$ cd ..
|
||||
$ git clone a a_clone
|
||||
Cloning into a_clone...
|
||||
done.
|
||||
$ cd a_clone
|
||||
$ git annex status
|
||||
(merging origin/git-annex into git-annex...)
|
||||
supported backends: SHA256 SHA1 SHA512 SHA224 SHA384 SHA256E SHA1E SHA512E SHA224E SHA384E WORM URL
|
||||
supported remote types: git bup directory rsync web hook
|
||||
trusted repositories: 0
|
||||
semitrusted repositories: 3
|
||||
00000000-0000-0000-0000-000000000001 -- web
|
||||
445d616e-ca8b-11e1-b170-ff8b03c54243 -- origin (a)
|
||||
5d3db51c-ca8b-11e1-bbc3-039dd06ab47b -- here
|
||||
untrusted repositories: 0
|
||||
dead repositories: 0
|
||||
available local disk space: 63 megabytes (+1 megabyte reserved)
|
||||
local annex keys: 0
|
||||
local annex size: 0 bytes
|
||||
known annex keys: 1
|
||||
known annex size: 3 bytes
|
||||
backend usage:
|
||||
SHA256: 1
|
||||
(Recording state in git...)
|
||||
$ ls
|
||||
file
|
||||
$ cat file
|
||||
cat: file: No such file or directory
|
||||
$ git annex get file
|
||||
get file (from origin...) ok
|
||||
(Recording state in git...)
|
||||
$ cat file
|
||||
hi
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,30 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
|
||||
nickname="Ben"
|
||||
subject="comment 8"
|
||||
date="2012-07-10T13:02:37Z"
|
||||
content="""
|
||||
On goldnerlab,
|
||||
|
||||
$ git annex status
|
||||
supported backends: SHA256 SHA1 SHA512 SHA224 SHA384 SHA256E SHA1E SHA512E SHA224E SHA384E WORM URL
|
||||
supported remote types: git S3 bup directory rsync web hook
|
||||
trusted repositories: 0
|
||||
semitrusted repositories: 4
|
||||
00000000-0000-0000-0000-000000000001 -- web
|
||||
02e4ea72-a77c-11e1-bbd7-0749b04e4b59 -- here (Data for Goldner)
|
||||
351f3ddc-ca3e-11e1-a3fc-6338ef4724a7
|
||||
3c1fd026-c794-11e1-8ebb-dbe8684e8a73
|
||||
untrusted repositories: 0
|
||||
dead repositories: 0
|
||||
transfers in progress: none
|
||||
available local disk space: 2 terabytes (+1 megabyte reserved)
|
||||
local annex keys: 19101
|
||||
local annex size: 41 gigabytes
|
||||
known annex keys: 19122
|
||||
known annex size: 41 gigabytes
|
||||
bloom filter size: 16 mebibytes (3.8% full)
|
||||
backend usage:
|
||||
WORM: 38223
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,17 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
|
||||
nickname="Justin"
|
||||
subject="comment 9"
|
||||
date="2012-07-10T14:08:10Z"
|
||||
content="""
|
||||
Can you run the series of commands I had above on your two machines? I figure there are two possibilities:
|
||||
|
||||
1. There is something wrong with the git-annex versions you are using.
|
||||
2. There is something wrong with your repository. (\"warning: There are too many unreachable loose objects\"?)
|
||||
|
||||
so if you can make a temp repository on goldnerlab, then clone it on the other machine and see where it fails, that would be helpful.
|
||||
|
||||
after cloning git-annex status should hopefully say that you have 1 known key, not 0.
|
||||
|
||||
Obviously this won't fix the problem, but it will at least narrow it down.
|
||||
"""]]
|
|
@ -1,14 +0,0 @@
|
|||
We need a way to calculate space taken by certain files.
|
||||
|
||||
Use cases: I want to drop some files from my small disk. I need to figure out things that take most space, and drop them.
|
||||
|
||||
Usage examples:
|
||||
|
||||
git annex du -hs *.mp3
|
||||
git annex du -sBm --in=here *.ogg
|
||||
|
||||
Would be nice if it was compatible with standard unix `df`.
|
||||
|
||||
> `du -L` works.
|
||||
>
|
||||
> See also: [[forum/Wishlist:_getting_the_disk_used_by_a_subtree_of_files]]
|
|
@ -12,12 +12,13 @@ Feel free to chip in with comments! --[[Joey]]
|
|||
* Month 3 "easy setup": [[!traillink configurators]] [[!traillink pairing]]
|
||||
* Month 4 "polishing": [[!traillink cloud]] [[!traillink leftovers]]
|
||||
* Months 5-6 "9k bonus round": [[!traillink Android]] [[!traillink partial_content]]
|
||||
* Months 7-11: user-driven features and polishing
|
||||
* Month 12: "Windows purgatory" [[Windows]]
|
||||
|
||||
## not yet on the map:
|
||||
|
||||
* [[desymlink]]
|
||||
* [[deltas]]
|
||||
* In my overfunded nighmares: [[Windows]]
|
||||
|
||||
## blog
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ In other words, I was lost in the weeds for a lot of those hours...
|
|||
|
||||
At one point, something glorious happened, and it was always making exactly
|
||||
one commit for batch mode modifications of a lot of files (like untarring
|
||||
them). Unfortunatly, I had to lose that gloriousness due to another
|
||||
them). Unfortunately, I had to lose that gloriousness due to another
|
||||
potential race, which, while unlikely, would have made the program deadlock
|
||||
if it happened.
|
||||
|
||||
|
@ -40,7 +40,7 @@ are still open for write.
|
|||
|
||||
This works great! Starting up `git annex watch` when processes have files
|
||||
open is no longer a problem, and even if you're evil enough to try having
|
||||
muliple processes open the same file, it will complain and not annex it
|
||||
multiple processes open the same file, it will complain and not annex it
|
||||
until all the writers close it.
|
||||
|
||||
(Well, someone really evil could turn the write bit back on after git annex
|
||||
|
|
|
@ -3,13 +3,13 @@ to `kqueue`, and Haskell code to use that library. By now I think I
|
|||
understand kqueue fairly well -- there are some very tricky parts to the
|
||||
interface.
|
||||
|
||||
But... it still did't work. After building all this, my code was
|
||||
But... it still didn't work. After building all this, my code was
|
||||
failing the same way that the
|
||||
[haskell kqueue library failed](https://github.com/hesselink/kqueue/issues/1)
|
||||
yesterday. I filed a [bug report with a testcase]().
|
||||
|
||||
Then I thought to ask on #haskell. Got sorted out in quick order! The
|
||||
problem turns out to be that haskell's runtime has a peridic SIGALARM,
|
||||
problem turns out to be that haskell's runtime has a periodic SIGALARM,
|
||||
that is interrupting my kevent call. It can be worked around with `+RTS -V0`,
|
||||
but I put in a fix to retry to kevent when it's interrupted.
|
||||
|
||||
|
|
|
@ -10,13 +10,13 @@ But it's not all easy. Syncing should happen as fast as possible, so
|
|||
changes show up without delay. Eventually it'll need to support syncing
|
||||
between nodes that cannot directly contact one-another. Syncing needs to
|
||||
deal with nodes coming and going; one example of that is a USB drive being
|
||||
plugged in, which should immediatly be synced, but network can also come
|
||||
plugged in, which should immediately be synced, but network can also come
|
||||
and go, so it should periodically retry nodes it failed to sync with. To
|
||||
start with, I'll be focusing on fast syncing between directly connected
|
||||
nodes, but I have to keep this wider problem space in mind.
|
||||
|
||||
One problem with `git annex sync` is that it has to be run in both clones
|
||||
in order for changes to fully propigate. This is because git doesn't allow
|
||||
in order for changes to fully propagate. This is because git doesn't allow
|
||||
pushing changes into a non-bare repository; so instead it drops off a new
|
||||
branch in `.git/refs/remotes/$foo/synced/master`. Then when it's run locally
|
||||
it merges that new branch into `master`.
|
||||
|
|
|
@ -12,7 +12,7 @@ not sufficient. There are two problems with it:
|
|||
So, instead, git-annex will use a regular `git merge`, and if it fails, it
|
||||
will fix up the conflicts.
|
||||
|
||||
That presented its own difficully, of finding which files in the tree
|
||||
That presented its own difficulty, of finding which files in the tree
|
||||
conflict. `git ls-files --unmerged` is the way to do that, but its output
|
||||
is a quite raw form:
|
||||
|
||||
|
@ -21,9 +21,9 @@ is a quite raw form:
|
|||
100644 1eabec834c255a127e2e835dadc2d7733742ed9a 2 bar
|
||||
100644 36902d4d842a114e8b8912c02d239b2d7059c02b 3 bar
|
||||
|
||||
I had to stare at the rather inpenetrable documentation for hours and
|
||||
I had to stare at the rather impenetrable documentation for hours and
|
||||
write a lot of parsing and processing code to get from that to these mostly
|
||||
self expanatory data types:
|
||||
self explanatory data types:
|
||||
|
||||
data Conflicting v = Conflicting
|
||||
{ valUs :: Maybe v
|
||||
|
|
|
@ -35,7 +35,7 @@ more threads:
|
|||
1. Uploads new data to every configured remote. Triggered by the watcher
|
||||
thread when it adds content. Easy; just use a `TSet` of Keys to send.
|
||||
|
||||
2. Downloads new data from the cheapest remote that has it. COuld be
|
||||
2. Downloads new data from the cheapest remote that has it. Could be
|
||||
triggered by the
|
||||
merger thread, after it merges in a git sync. Rather hard; how does it
|
||||
work out what new keys are in the tree without scanning it all? Scan
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
Well, sometimes you just have to go for the hack. Trying to find a way
|
||||
to add additional options to git-annex-shell without breaking backwards
|
||||
compatability, I noticed that it ignores all options after `--`, because
|
||||
compatibility, I noticed that it ignores all options after `--`, because
|
||||
those tend to be random rsync options due to the way rsync runs it.
|
||||
|
||||
So, I've added a new class of options, that come in between, like
|
||||
|
|
|
@ -21,5 +21,5 @@ nontrivial features can be added easily.
|
|||
|
||||
--
|
||||
|
||||
Next up: Enough nonsense with tracking tranfers... Time to start actually
|
||||
Next up: Enough nonsense with tracking transfers... Time to start actually
|
||||
transferring content around!
|
||||
|
|
|
@ -6,7 +6,7 @@ Details follow..
|
|||
|
||||
Made the committer thread queue Upload Transfers when new files
|
||||
are added to the annex. Currently it tries to transfer the new content
|
||||
to *every* remote; this innefficiency needs to be addressed later.
|
||||
to *every* remote; this inefficiency needs to be addressed later.
|
||||
|
||||
Made the watcher thread queue Download Transfers when new symlinks
|
||||
appear that point to content we don't have. Typically, that will happen
|
||||
|
@ -30,12 +30,12 @@ all the assistant's other threads from entering that monad while a transfer
|
|||
is running. This is also necessary to allow multiple concurrent transfers
|
||||
to run in the future.
|
||||
|
||||
This is a very tricky peice of code, because that thread will modify the
|
||||
This is a very tricky piece of code, because that thread will modify the
|
||||
git-annex branch, and its parent thread has to invalidate its cache in
|
||||
order to see any changes the child thread made. Hopefully that's the extent
|
||||
of the complication of doing this. The only reason this was possible at all
|
||||
is that git-annex already support multiple concurrent processes running
|
||||
and all making independant changes to the git-annex branch, etc.
|
||||
and all making independent changes to the git-annex branch, etc.
|
||||
|
||||
After all my groundwork this week, file content transferring is now
|
||||
fully working!
|
||||
|
|
31
doc/design/assistant/blog/day_27__robust_transfers.mdwn
Normal file
31
doc/design/assistant/blog/day_27__robust_transfers.mdwn
Normal file
|
@ -0,0 +1,31 @@
|
|||
Spent most of the day making file content transfers robust. There were lots
|
||||
of bugs, hopefully I've fixed most of them. It seems to work well now,
|
||||
even when I throw a lot of files at it.
|
||||
|
||||
One of the changes also sped up transfers; it no longer roundtrips to the
|
||||
remote to verify it has a file. The idea here is that when the assistant is
|
||||
running, repos should typically be fairly tightly synced to their remotes
|
||||
by it, so some of the extra checks that the `move` command does are
|
||||
unnecessary.
|
||||
|
||||
Also spent some time trying to use ghc's threaded runtime, but continue to
|
||||
be baffled by the random hangs when using it. This needs fixing eventually;
|
||||
all the assistant's threads can potentially be blocked when it's waiting on
|
||||
an external command it has run.
|
||||
|
||||
Also changed how transfer info files are locked. The lock file is now
|
||||
separate from the info file, which allows the TransferWatcher thread to
|
||||
notice when an info file is created, and thus actually track transfers
|
||||
initiated by remotes.
|
||||
|
||||
---
|
||||
|
||||
I'm fairly close now to merging the `assistant` branch into `master`.
|
||||
The data syncing code is very brute-force, but it will work well enough
|
||||
for a first cut.
|
||||
|
||||
Next I can either add some repository network mapping, and use graph
|
||||
analysis to reduce the number of data transfers, or I can move on to the
|
||||
[[webapp]]. Not sure yet which I'll do. It's likely that since DebConf
|
||||
begins tomorrow I'll put off either of those big things until after the
|
||||
conference.
|
|
@ -0,0 +1,17 @@
|
|||
I didn't plan to work on git-annex much while at DebConf, because the conference
|
||||
always prevents the kind of concentration I need. But I unexpectedly also had to deal
|
||||
with [three dead drives](http://joeyh.name/blog/entry/I_am_become_Joey_destroyer_of_drives/)
|
||||
and illness this week.
|
||||
|
||||
That said, I have been trying to debug a problem with git-annex and Haskell's threaded
|
||||
runtime all week. It just hangs, randomly. No luck so far isolating why, although I now
|
||||
have a branch that hangs fairly reliably, and in which I am trying to whittle the entire
|
||||
git-annex code base (all 18 thousand lines!) into a nice test case.
|
||||
|
||||
This threaded runtime problem doesn't affect the assistant yet, but if I want to use
|
||||
Yesod in developing the webapp, I'll need the threaded runtime, and using the threaded
|
||||
runtime in the assistant generally would make it more responsive and less hacky.
|
||||
|
||||
Since this is a task I can work on without much concentration, I'll probably keep beating
|
||||
on it until I return home. Then I need to spend some quality thinking time on where
|
||||
to go next in the assistant.
|
|
@ -1,6 +1,6 @@
|
|||
Last night I got `git annex watch` to also handle deletion of files.
|
||||
This was not as tricky as feared; the key is using `git rm --ignore-unmatch`,
|
||||
which avoids most problimatic situations (such as a just deleted file
|
||||
which avoids most problematic situations (such as a just deleted file
|
||||
being added back before git is run).
|
||||
|
||||
Also fixed some races when `git annex watch` is doing its startup scan of
|
||||
|
|
9
doc/design/assistant/blog/day_36__minimal_test_case.mdwn
Normal file
9
doc/design/assistant/blog/day_36__minimal_test_case.mdwn
Normal file
|
@ -0,0 +1,9 @@
|
|||
Managed to find a minimal, 20 line test case for at least one of the ways
|
||||
git-annex was hanging with GHC's threaded runtime. Sent it off to
|
||||
haskell-cafe for analysis.
|
||||
[thread](http://news.gmane.org/gmane.comp.lang.haskell.cafe)
|
||||
|
||||
Further managed to narrow the bug down to MissingH's use of logging code,
|
||||
that git-annex doesn't use. [bug report](http://bugs.debian.org/681621).
|
||||
So, I can at least get around this problem with a modified version of
|
||||
MissingH. Hopefully that was the only thing causing the hangs I was seeing!
|
|
@ -16,7 +16,7 @@ thread that wakes up periodically, flushes the queue, and autocommits.
|
|||
(This will, in fact, be the start of the [[syncing]] phase of my roadmap!)
|
||||
There's lots of room here for smart behavior. Like, if a lot of changes are
|
||||
being made close together, wait for them to die down before committing. Or,
|
||||
if it's been idle and a single file appears, commit it immediatly, since
|
||||
if it's been idle and a single file appears, commit it immediately, since
|
||||
this is probably something the user wants synced out right away. I'll start
|
||||
with something stupid and then add the smarts.
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ things slow and ugly. This was not unexpected.
|
|||
|
||||
So next, I added some smarts to it. First, I wanted to stop it waking up
|
||||
every second when there was nothing to do, and instead blocking wait on a
|
||||
change occuring. Secondly, I wanted it to know when past changes happened,
|
||||
change occurring. Secondly, I wanted it to know when past changes happened,
|
||||
so it could detect batch mode scenarios, and avoid committing too
|
||||
frequently.
|
||||
|
||||
|
@ -52,6 +52,6 @@ shouldCommit now changetimes
|
|||
thisSecond t = now `diffUTCTime` t <= 1
|
||||
"""]]
|
||||
|
||||
Still some polishing to do to eliminate minor innefficiencies and deal
|
||||
Still some polishing to do to eliminate minor inefficiencies and deal
|
||||
with more races, but this part of the git-annex assistant is now very usable,
|
||||
and will be going out to my beta testers soon!
|
||||
|
|
|
@ -24,7 +24,7 @@ symlinks might have just been deleted and re-added, or changed, and
|
|||
the index still have the old value.
|
||||
|
||||
Instead, I got creative. :) We can't trust what the index says about the
|
||||
symlink, but if the index happens to contian a symlink that looks right,
|
||||
symlink, but if the index happens to contain a symlink that looks right,
|
||||
we can trust that the SHA1 of its blob is the right SHA1, and reuse it
|
||||
when re-staging the symlink. Wham! Massive speedup!
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ own git index parser (or use one from Hackage), this check requires running
|
|||
tree of files is being moved or unpacked into the watched directory.
|
||||
|
||||
Instead, I made it only do the check during `git annex watch`'s initial
|
||||
scan of the tree. This should be ok, because once it's running, you
|
||||
scan of the tree. This should be OK, because once it's running, you
|
||||
won't be adding new files to git anyway, since it'll automatically annex
|
||||
new files. This is good enough for now, but there are at least two problems
|
||||
with it:
|
||||
|
|
|
@ -16,7 +16,7 @@ quickly is really only important so people don't think it's a resource hog.
|
|||
First impressions are important. :)
|
||||
|
||||
But what does "made recently" mean exactly? Well, my answer is possibly
|
||||
overengineered, but most of it is really groundwork for things I'll need
|
||||
over engineered, but most of it is really groundwork for things I'll need
|
||||
later anyway. I added a new data structure for tracking the status of the
|
||||
daemon, which is periodically written to disk by another thread (thread #6!)
|
||||
to `.git/annex/daemon.status` Currently it looks like this; I anticipate
|
||||
|
|
|
@ -3,11 +3,11 @@ all the other git clones, at both the git level and the key/value level.
|
|||
|
||||
## immediate action items
|
||||
|
||||
* Check that download transfer triggering code works (when a symlink appears
|
||||
and the remote does *not* upload to us.
|
||||
* At startup, and possibly periodically, look for files we have that
|
||||
location tracking indicates remotes do not, and enqueue Uploads for
|
||||
them. Also, enqueue Downloads for any files we're missing.
|
||||
* After git sync, identify content that we don't have that is now available
|
||||
on remotes, and transfer.
|
||||
|
||||
## longer-term TODO
|
||||
|
||||
|
@ -29,6 +29,9 @@ all the other git clones, at both the git level and the key/value level.
|
|||
only uploading new files but not downloading, and only downloading
|
||||
files in some directories and not others. See for use cases:
|
||||
[[forum/Wishlist:_options_for_syncing_meta-data_and_data]]
|
||||
* speed up git syncing by using the cached ssh connection for it too
|
||||
(will need to use `GIT_SSH`, which needs to point to a command to run,
|
||||
not a shell command line)
|
||||
|
||||
## misc todo
|
||||
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawlup4hyZo4eCjF8T85vfRXMKBxGj9bMdl0"
|
||||
nickname="Ben"
|
||||
subject="ARM support"
|
||||
date="2012-07-13T16:51:15Z"
|
||||
content="""
|
||||
The closure of [this](http://hackage.haskell.org/trac/ghc/ticket/5839) ticket hopefully marks the end of TH issues on ARM. As of 7.4.2, GHC's linker has enough ARM support to allow a selection of common packages compile on my PandaBoard. That being said, it hasn't had a whole lot of testing so it's possible I still need to implement a few relocation types.
|
||||
"""]]
|
|
@ -0,0 +1,12 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawncBlzaDI248OZGjKQMXrLVQIx4XrZrzFo"
|
||||
nickname="Perttu"
|
||||
subject="comment 2"
|
||||
date="2012-07-07T17:45:43Z"
|
||||
content="""
|
||||
Ah, I didn't read the man page carefully enough. My apologies.
|
||||
|
||||
Setting the ignore status based on an exit status would be
|
||||
even better, since this avoids re-writing a new config file for
|
||||
each repository each time I enter or exit my LAN.
|
||||
"""]]
|
12
doc/forum/pulling_from_encrypted_remote.mdwn
Normal file
12
doc/forum/pulling_from_encrypted_remote.mdwn
Normal file
|
@ -0,0 +1,12 @@
|
|||
Is there a way to pull from an encrypted remote?
|
||||
|
||||
Use case:
|
||||
|
||||
1. Have annex in an encrypted public rsync remote
|
||||
2. Have USB stick with PGP keys (but not the annex repository)
|
||||
3. Get to a new computer
|
||||
4. Set up a new annex using the PGP keys I have.
|
||||
|
||||
1-3 work fine :) However, 4'th is the issue:
|
||||
|
||||
How would I do `git pull <remote>` for an encrypted remote? Is it possible?
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawmBUR4O9mofxVbpb8JV9mEbVfIYv670uJo"
|
||||
nickname="Justin"
|
||||
subject="comment 1"
|
||||
date="2012-07-10T18:14:31Z"
|
||||
content="""
|
||||
You just need to `git clone` the existing repository and make sure the `git remote`s are setup.
|
||||
|
||||
the 'rsync remote' is not actually the annex, it's just a collection of encrypted files with obfuscated names. You need a copy of the actual repository to restore the files.
|
||||
"""]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="http://bergey.dreamwidth.org/"
|
||||
ip="66.80.90.109"
|
||||
subject="git-media"
|
||||
date="2012-07-14T15:42:05Z"
|
||||
content="""
|
||||
I haven't used git-media, but from the README it looks as though they now support several backends. Might want to update the (very helpful!) comparison.
|
||||
"""]]
|
8
doc/todo/assistant_git_sync_laddering.mdwn
Normal file
8
doc/todo/assistant_git_sync_laddering.mdwn
Normal file
|
@ -0,0 +1,8 @@
|
|||
When the [[design/assistant]] is running on a pair of remotes, I've seen
|
||||
them get out of sync, such that every pull and merge results in a conflict,
|
||||
that then has to be auto-resolved.
|
||||
|
||||
This seems similar to the laddering problem described in this old bug:
|
||||
[[bugs/making_annex-merge_try_a_fast-forward]]
|
||||
|
||||
--[[Joey]]
|
|
@ -0,0 +1,16 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawmB-gCGEs--zfmvYU-__Hj2FbliUXgxMDs"
|
||||
nickname="Jakub"
|
||||
subject="Path problems"
|
||||
date="2012-07-13T19:15:15Z"
|
||||
content="""
|
||||
Hi,
|
||||
|
||||
I have a same 'git-annex-shell command not found' problem as above. I've installed git annex via cabal into my ~/.haskell_bin directory. Then I've added this dir both to ~/.bashrc and ~/.zshrc. I can run git annex or 'git annex-shell' and everything is fine. My guess is that haskell is trying to spawn git-annex-shell with some current $PATH unaware shell like dash maybe?
|
||||
|
||||
I've fixed this behavior by using a really ugly hack - I've symlinked ~/.haskell_bin/git-annex-shell to /usr/bin/git-annex-shell on all my machines and the problem is gone. Somehow haskell (or whatever is trying to call git-annex-shell) is unaware of path modifications from .bashrc/.zshrc
|
||||
|
||||
Here is the path modification I've used:
|
||||
|
||||
export PATH=~/.haskell_bin:$PATH
|
||||
"""]]
|
|
@ -0,0 +1,23 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawmB-gCGEs--zfmvYU-__Hj2FbliUXgxMDs"
|
||||
nickname="Jakub"
|
||||
subject="Fixed"
|
||||
date="2012-07-13T19:27:46Z"
|
||||
content="""
|
||||
Found the problem:
|
||||
|
||||
One should never use ~ in such path:
|
||||
|
||||
WRONG export PATH=~/somedir:$PATH
|
||||
|
||||
Instead one should use $HOME:
|
||||
|
||||
GOOD export PATH=$HOME/somedir:$PATH
|
||||
|
||||
Can I surpress the message that shell failed with status 255 when a repo is unavailible? I've got two repos pointing to one machine - either via vpn or local lan and I keep getting erros if one is unavailible:
|
||||
|
||||
ssh: connect to host 10.9.0.1 port 39882: No route to host
|
||||
Command ssh [\"-S\",\"/home/pielgrzym/annex/.git/annex/ssh/nas\",\"-o\",\"ControlMaster=auto\",\"-o\",\"ControlPersist=yes\",\"nas\",\"git-annex-shell 'configlist' '/~/annex'\"] failed; exit code 255
|
||||
|
||||
|
||||
"""]]
|
Loading…
Add table
Reference in a new issue