Merge branch 'master' into watch
This commit is contained in:
commit
483b1b08c6
24 changed files with 225 additions and 48 deletions
|
@ -12,7 +12,7 @@ module Annex.CatFile (
|
|||
catFileHandle
|
||||
) where
|
||||
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
import Common.Annex
|
||||
import qualified Git
|
||||
|
|
|
@ -10,8 +10,7 @@
|
|||
module Command.Unused where
|
||||
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Text.Lazy as L
|
||||
import qualified Data.Text.Lazy.Encoding as L
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Data.BloomFilter
|
||||
import Data.BloomFilter.Easy
|
||||
import Data.BloomFilter.Hash
|
||||
|
@ -265,8 +264,9 @@ withKeysReferencedInGitRef a ref = do
|
|||
go [] = noop
|
||||
go (l:ls)
|
||||
| isSymLink (LsTree.mode l) = do
|
||||
content <- L.decodeUtf8 <$> catFile ref (LsTree.file l)
|
||||
case fileKey (takeFileName $ L.unpack content) of
|
||||
content <- encodeW8 . L.unpack
|
||||
<$> catFile ref (LsTree.file l)
|
||||
case fileKey (takeFileName content) of
|
||||
Nothing -> go ls
|
||||
Just k -> do
|
||||
a k
|
||||
|
|
|
@ -26,7 +26,7 @@ module Crypto (
|
|||
prop_hmacWithCipher_sane
|
||||
) where
|
||||
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Data.ByteString.Lazy.UTF8 (fromString)
|
||||
import Data.Digest.Pure.SHA
|
||||
import Control.Applicative
|
||||
|
|
|
@ -15,8 +15,8 @@ module Git.CatFile (
|
|||
) where
|
||||
|
||||
import System.IO
|
||||
import qualified Data.ByteString.Char8 as S
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
import Common
|
||||
import Git
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
module Remote.Bup (remote) where
|
||||
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.Map as M
|
||||
import System.Process
|
||||
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
|
||||
module Remote.Directory (remote) where
|
||||
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
import qualified Data.ByteString.Char8 as S
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.Map as M
|
||||
import qualified Control.Exception as E
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
module Remote.Hook (remote) where
|
||||
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.Map as M
|
||||
import System.Exit
|
||||
import System.Environment
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
module Remote.Rsync (remote) where
|
||||
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.Map as M
|
||||
|
||||
import Common.Annex
|
||||
|
|
|
@ -13,6 +13,8 @@ import Foreign.C
|
|||
import System.IO
|
||||
import System.IO.Unsafe
|
||||
import qualified Data.Hash.MD5 as MD5
|
||||
import Data.Word
|
||||
import Data.Bits.Utils
|
||||
|
||||
{- Sets a Handle to use the filesystem encoding. This causes data
|
||||
- written or read from it to be encoded/decoded the same
|
||||
|
@ -29,7 +31,7 @@ withFilePath :: FilePath -> (CString -> IO a) -> IO a
|
|||
withFilePath fp f = Encoding.getFileSystemEncoding
|
||||
>>= \enc -> GHC.withCString enc fp f
|
||||
|
||||
{- Encodes a FilePath into a Str, applying the filesystem encoding.
|
||||
{- Encodes a FilePath into a Md5.Str, applying the filesystem encoding.
|
||||
-
|
||||
- This use of unsafePerformIO is belived to be safe; GHC's interface
|
||||
- only allows doing this conversion with CStrings, and the CString buffer
|
||||
|
@ -41,3 +43,15 @@ encodeFilePath :: FilePath -> MD5.Str
|
|||
encodeFilePath fp = MD5.Str $ unsafePerformIO $ do
|
||||
enc <- Encoding.getFileSystemEncoding
|
||||
GHC.withCString enc fp $ GHC.peekCString Encoding.char8
|
||||
|
||||
{- Converts a [Word8] to a FilePath, encoding using the filesystem encoding.
|
||||
-
|
||||
- w82c produces a String, which may contain Chars that are invalid
|
||||
- unicode. From there, this is really a simple matter of applying the
|
||||
- file system encoding, only complicated by GHC's interface to doing so.
|
||||
-}
|
||||
{-# NOINLINE encodeW8 #-}
|
||||
encodeW8 :: [Word8] -> FilePath
|
||||
encodeW8 w8 = unsafePerformIO $ do
|
||||
enc <- Encoding.getFileSystemEncoding
|
||||
GHC.withCString Encoding.char8 (w82s w8) $ GHC.peekCString enc
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
module Utility.Gpg where
|
||||
|
||||
import qualified Data.ByteString.Lazy.Char8 as L
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import System.Posix.Types
|
||||
import Control.Applicative
|
||||
import Control.Concurrent
|
||||
|
|
1
debian/changelog
vendored
1
debian/changelog
vendored
|
@ -5,6 +5,7 @@ git-annex (3.20120616) UNRELEASED; urgency=low
|
|||
need to manually run git commands when manipulating files.
|
||||
Available on Linux, BSDs, and OSX!
|
||||
* Enable diskfree on kfreebsd, using statvfs.
|
||||
* unused: Fix crash when key names contain invalid utf8.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Tue, 12 Jun 2012 11:35:59 -0400
|
||||
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
What steps will reproduce the problem?
|
||||
I don't know exactly when it started
|
||||
|
||||
What is the expected output? What do you see instead?
|
||||
When I run git annex unused I get
|
||||
|
||||
unused . (checking for unused data...) (checking master...) git-annex: Cannot decode byte '\xb4': Data.Text.Encoding.decodeUtf8: Invalid UTF-8 stream
|
||||
|
||||
Most likely I have added some file with a strange encoding that git-annex can't decode. The problem is that the unused process aborts because of this.
|
||||
|
||||
What version of git-annex are you using? On what operating system?
|
||||
3.20120522, Debian testing
|
||||
|
||||
> I've just fixed this bug in git, will be in the next release. --[[Joey]]
|
||||
> [[done]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="http://joeyh.name/"
|
||||
ip="4.154.2.6"
|
||||
subject="comment 1"
|
||||
date="2012-06-20T14:30:27Z"
|
||||
content="""
|
||||
Try running `git annex unused --debug`; this will tell us the git command that's outputing the data it cannot process. Then you can try running that git command and see what the problem filename is.
|
||||
"""]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="http://joeyh.name/"
|
||||
ip="4.154.2.6"
|
||||
subject="comment 2"
|
||||
date="2012-06-20T14:34:23Z"
|
||||
content="""
|
||||
Your `locale` setting may also be relevant. FWIW, I've tried to create a file with `\xb4` in its name and have not gotten git-annex unused to crash on it.
|
||||
"""]]
|
|
@ -0,0 +1,17 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawnXgp-iIaBK5pnk22xqMVERQb97VyXaejs"
|
||||
nickname="Kristian"
|
||||
subject="comment 3"
|
||||
date="2012-06-20T14:37:09Z"
|
||||
content="""
|
||||
This is what happens when I add the debug parameter
|
||||
|
||||
git annex unused --debug
|
||||
|
||||
unused . (checking for unused data...) git [\"--git-dir=/home/kristian/AnnexMedia/.git\",\"--work-tree=/home/kristian/AnnexMedia\",\"ls-files\",\"--cached\",\"-z\",\"--\",\"/home/kristian/AnnexMedia\"]
|
||||
git [\"--git-dir=/home/kristian/AnnexMedia/.git\",\"--work-tree=/home/kristian/AnnexMedia\",\"show-ref\"]
|
||||
(checking master...) git [\"--git-dir=/home/kristian/AnnexMedia/.git\",\"--work-tree=/home/kristian/AnnexMedia\",\"ls-tree\",\"--full-tree\",\"-z\",\"-r\",\"--\",\"refs/heads/master\"]
|
||||
git [\"--git-dir=/home/kristian/AnnexMedia/.git\",\"--work-tree=/home/kristian/AnnexMedia\",\"cat-file\",\"--batch\"]
|
||||
git-annex: Cannot decode byte '\xb4': Data.Text.Encoding.decodeUtf8: Invalid UTF-8 stream
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="http://joeyh.name/"
|
||||
ip="4.154.2.6"
|
||||
subject="comment 4"
|
||||
date="2012-06-20T14:49:09Z"
|
||||
content="""
|
||||
Ah, reproduced it; need to use the WORM backend and have the file present in another branch..
|
||||
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,19 @@
|
|||
[[!comment format=mdwn
|
||||
username="https://www.google.com/accounts/o8/id?id=AItOawnXgp-iIaBK5pnk22xqMVERQb97VyXaejs"
|
||||
nickname="Kristian"
|
||||
subject="comment 5"
|
||||
date="2012-06-20T14:55:33Z"
|
||||
content="""
|
||||
I checkout out the git annex branch and using
|
||||
|
||||
find * | grep -P \"[\xb4]\"
|
||||
|
||||
I found a file
|
||||
|
||||
43e/b16/WORM-s4118528-m1245167306--Jerry Lee Lewis - Whole Lotta Shakin\302\264 Going\302\264 On.mp3.log
|
||||
|
||||
The corresponding file also existed in the master branch (as a link).
|
||||
|
||||
I moved both these files to a folder outside my repository and synched my git-annex branch with by master server. I still get the same error. Is there any other place where information about this file is stored?
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="http://joeyh.name/"
|
||||
ip="4.154.2.6"
|
||||
subject="comment 6"
|
||||
date="2012-06-20T16:59:53Z"
|
||||
content="""
|
||||
git-annex was not crashing due to content in the git-annex branch, but due to a symlink in one of your regular git branches, probably master and origin/master.
|
||||
|
||||
This bug is fixed in git master, if you need the fix before the next release.
|
||||
"""]]
|
37
doc/bugs/watch_command_on_OSX_10.7.mdwn
Normal file
37
doc/bugs/watch_command_on_OSX_10.7.mdwn
Normal file
|
@ -0,0 +1,37 @@
|
|||
Running the tip of the watch branch on OSX in an annex'ed directory.
|
||||
|
||||
The watch command detects the changes, does _something_, see the output below.
|
||||
|
||||
Output from watch command
|
||||
|
||||
<pre>
|
||||
(Recording state in git...)
|
||||
Added "./KeePass2.18.dmg"
|
||||
Added "./KeePassX-0.4.3.dmg"
|
||||
add ./KeePass2.18.dmg (checksum...) ok
|
||||
add ./KeePassX-0.4.3.dmg (checksum...) ok
|
||||
</pre>
|
||||
|
||||
State of the annex
|
||||
|
||||
<pre>
|
||||
laplace:annex jtang$ git status
|
||||
# On branch master
|
||||
# Untracked files:
|
||||
# (use "git add <file>..." to include in what will be committed)
|
||||
#
|
||||
# KeePass2.18.dmg
|
||||
# KeePassX-0.4.3.dmg
|
||||
nothing added to commit but untracked files present (use "git add" to track)
|
||||
</pre>
|
||||
|
||||
It seems to not do a git add and commit after the creation of the symlinks, manually doing this makes it all happy again till more files are added.
|
||||
|
||||
note: i had posted a comment in the blog post, but posting the issue here is probably more appropriate.
|
||||
|
||||
> Yeah, this is the issue I was struggling with last night.
|
||||
> I think it's fixed in 57cf65eb6d811ba7fd19eb62a54e3b83a0c2dfa7,
|
||||
> but the kqueue watch still needs a lot of work. --[[Joey]]
|
||||
|
||||
>> Confirmed this is fixed, but do note the known kqueue bugs in
|
||||
>> [[design/assistant/inotify]]! [[done]] --[[Joey]]
|
34
doc/design/assistant/blog/day_13__kqueue_continued.mdwn
Normal file
34
doc/design/assistant/blog/day_13__kqueue_continued.mdwn
Normal file
|
@ -0,0 +1,34 @@
|
|||
Good news! My beta testers report that the new kqueue code works on OSX.
|
||||
At least "works" as well as it does on Debian kFreeBSD. My crazy
|
||||
development strategy of developing on Debian kFreeBSD while targeting Mac
|
||||
OSX is vindicated. ;-)
|
||||
|
||||
So, I've been beating the kqueue code into shape for the last 12 hours,
|
||||
minus a few hours sleep.
|
||||
|
||||
First, I noticed it was seeming to starve the other threads. I'm using
|
||||
Haskell's non-threaded runtime, which does cooperative multitasking between
|
||||
threads, and my C code was never returning to let the other threads run.
|
||||
Changed that around, so the C code runs until SIGALARMed, and then that
|
||||
thread calls `yield` before looping back into the C code. Wow, cooperative
|
||||
multitasking.. I last dealt with that when programming for Windows 3.1!
|
||||
(Should try to use Haskell's -threaded runtime sometime, but git-annex
|
||||
doesn't work under it, and I have not tried to figure out why not.)
|
||||
|
||||
Then I made a [single commit](http://source.git-annex.branchable.com/?p=source.git;a=commitdiff;h=2bfcc0b09c5dd37c5e0ab65cb089232bfcc31934),
|
||||
with no testing, in which I made the kqueue code maintain a cache of what
|
||||
it expects in the directory tree, and use that to determine what files
|
||||
changed how when a change is detected. Serious code. It worked on the
|
||||
first go. If you were wondering why I'm writing in Haskell ... yeah,
|
||||
that's why.
|
||||
|
||||
And I've continued to hammer on the kqueue code, making lots of little
|
||||
fixes, and at this point it seems *almost* able to handle the changes I
|
||||
throw at it. It does have one big remaining problem; kqueue doesn't tell me
|
||||
when a writer closes a file, so it will sometimes miss adding files. To fix
|
||||
this, I'm going to need to make it maintain a queue of new files, and
|
||||
periodically check them, with `lsof`, to see when they're done being
|
||||
written to, and add them to the annex. So while a file is being written
|
||||
to, `git annex watch` will have to wake up every second or so, and run
|
||||
`lsof` ... and it'll take it at least 1 second to notice a file's complete.
|
||||
Not ideal, but the best that can be managed with kqueue.
|
|
@ -13,6 +13,14 @@ There is a `watch` branch in git that adds the command.
|
|||
|
||||
* When you `git annex unlock` a file, it will immediately be re-locked.
|
||||
|
||||
* With kqueue, if a file is created and still has a writer, it'll
|
||||
give up adding it, and it will never get added. This is because kqueue
|
||||
cannot track file closes. Need to go back and check these files every
|
||||
second or something.
|
||||
|
||||
* Kqueue has to open every directory it watches, so too many directories
|
||||
will run it out of the max number of open files (typically 1024), and fail.
|
||||
|
||||
## beyond Linux
|
||||
|
||||
I'd also like to support OSX and if possible the BSDs.
|
||||
|
@ -58,40 +66,8 @@ I'd also like to support OSX and if possible the BSDs.
|
|||
|
||||
* Windows has a Win32 ReadDirectoryChangesW, and perhaps other things.
|
||||
|
||||
## beyond Linux
|
||||
|
||||
I'd also like to support OSX and if possible the BSDs.
|
||||
|
||||
* kqueue ([haskell bindings](http://hackage.haskell.org/package/kqueue))
|
||||
is supported by FreeBSD, OSX, and other BSDs.
|
||||
|
||||
From what I can find, kqueue does not provide full directory watching
|
||||
capabilities. To watch a file, you have to have an open file descriptor
|
||||
to the file. This wouldn't scale.
|
||||
|
||||
Gamin does the best it can with just kqueue, supplimented by polling.
|
||||
The source file `server/gam_kqueue.c` makes for interesting reading.
|
||||
Using gamin to do the heavy lifting is one option.
|
||||
([haskell bindings](http://hackage.haskell.org/package/hlibfam) for FAM;
|
||||
gamin shares the API)
|
||||
|
||||
* hfsevents ([haskell bindings](http://hackage.haskell.org/package/hfsevents))
|
||||
is OSX specific.
|
||||
|
||||
Originally it was only directory level, and you were only told a
|
||||
directory had changed and not which file. Based on the haskell
|
||||
binding's code, from OSX 10.7.0, file level events were added.
|
||||
|
||||
This will be harder for me to develop for, since I don't have access to
|
||||
OSX machines..
|
||||
|
||||
* Windows has a Win32 ReadDirectoryChangesW, and perhaps other things.
|
||||
|
||||
## todo
|
||||
|
||||
- Support OSes other than Linux; it only uses inotify currently.
|
||||
OSX and FreeBSD use the same mechanism, and there is a Haskell interface
|
||||
for it,
|
||||
- Run niced and ioniced? Seems to make sense, this is a background job.
|
||||
- configurable option to only annex files meeting certian size or
|
||||
filename criteria
|
||||
|
|
14
doc/forum/Wishlist:_automatic_reinject.mdwn
Normal file
14
doc/forum/Wishlist:_automatic_reinject.mdwn
Normal file
|
@ -0,0 +1,14 @@
|
|||
I think it would be useful to supplement the `reinject` command with an automatic
|
||||
mode which calculates the checksum of the source file and injects the file if it
|
||||
is known to the repository (without the need to provide a destination filename).
|
||||
In addition, this could be done recursively if the user provides a directory to
|
||||
inject. All this can probably be done already with some plumbing, but a simple
|
||||
`reinject --auto` (or `scour`, or `scavenge`, if you like) would be a nice addition.
|
||||
Of course this would only work for the checksum backends.
|
||||
|
||||
Example use cases would be:
|
||||
|
||||
* Recovering data from lost+found easily
|
||||
* Making use of old (pre-git-annex) archival volumes with useful files
|
||||
scattered among non-useful files
|
||||
* Sneaker-netting files between disconnected git-annex repositories
|
|
@ -31,4 +31,4 @@ if [ "$?" = 1 ]; then
|
|||
fi
|
||||
</pre>
|
||||
|
||||
It's also using the branches-local script for sorting and prioritising the branches to build, this branches-local script can be found at the [autobuild-ceph](https://github.com/ceph/autobuild-ceph/blob/master/branches-local) repository. If there are other people interested in setting up their own instances of gitbuilder for git-annex, please let me know and I will setup an aggregator page to collect status of the builds. The builder runs and updates the webpage every 30mins.
|
||||
It's also using the branches-local script for sorting and prioritising the branches to build, this branches-local script can be found at the [autobuild-ceph](https://github.com/ceph/autobuild-ceph/blob/master/branches-local) repository. If there are other people interested in setting up their own instances of gitbuilder for git-annex, please let me know and I will setup an aggregator page to collect status of the builds. The builder runs and updates on a very regular basis.
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
[[!comment format=mdwn
|
||||
username="http://www.davidhaslem.com/"
|
||||
nickname="David"
|
||||
subject="comment 7"
|
||||
date="2012-06-19T04:41:27Z"
|
||||
content="""
|
||||
$(brew --prefix) should, in most cases, be /usr/local. That's the recommended install location for homebrew.
|
||||
|
||||
I already had git installed and homebrew as my package manager - my install steps were as follows:
|
||||
|
||||
1. brew install haskell-platform ossp-uuid md5sha1sum coreutils pcre
|
||||
2. PATH=\"$(brew --prefix coreutils)/libexec/gnubin:$PATH\" cabal install git-annex
|
||||
|
||||
"""]]
|
Loading…
Add table
Reference in a new issue