Merge branch 'master' into android-rebuild

This commit is contained in:
Joey Hess 2013-09-22 22:45:51 -04:00
commit dcb9dead6f
14 changed files with 159 additions and 19 deletions

View file

@ -1,6 +1,6 @@
{- git-annex SHA backends
-
- Copyright 2011,2012 Joey Hess <joey@kitenet.net>
- Copyright 2011-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -12,10 +12,10 @@ import qualified Annex
import Types.Backend
import Types.Key
import Types.KeySource
import Utility.Hash
import Utility.ExternalSHA
import qualified Build.SysConfig as SysConfig
import Data.Digest.Pure.SHA
import qualified Data.ByteString.Lazy as L
import Data.Char
@ -70,12 +70,14 @@ shaCommand shasize filesize
| shasize == 512 = use SysConfig.sha512 sha512
| otherwise = error $ "bad sha size " ++ show shasize
where
use Nothing sha = Left $ showDigest . sha
use (Just c) sha
{- use builtin, but slower sha for small files
- benchmarking indicates it's faster up to
- and slightly beyond 50 kb files -}
| filesize < 51200 = use Nothing sha
use Nothing hasher = Left $ show . hasher
use (Just c) hasher
{- Use builtin, but slightly slower hashing for
- smallish files. Cryptohash benchmarks 90 to 101%
- faster than external hashers, depending on the hash
- and system. So there is no point forking an external
- process unless the file is large. -}
| filesize < 1048576 = use Nothing hasher
| otherwise = Right c
{- A key is a checksum of its contents. -}

View file

@ -18,7 +18,6 @@ module Git.CatFile (
import System.IO
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.Digest.Pure.SHA
import Data.Char
import System.Process (std_out, std_err)
import Numeric
@ -31,6 +30,7 @@ import Git.Command
import Git.Types
import Git.FilePath
import qualified Utility.CoProcess as CoProcess
import Utility.Hash
data CatFileHandle = CatFileHandle CoProcess.CoProcessHandle Repo
@ -103,7 +103,7 @@ catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send rece
}
fileEncoding h
content <- L.hGetContents h
let sha = (\s -> length s `seq` s) (showDigest $ sha1 content)
let sha = (\s -> length s `seq` s) (show $ sha1 content)
ok <- checkSuccessProcess pid
return $ if ok
then Just (content, Ref sha)

View file

@ -10,6 +10,7 @@ module Remote.Bup (remote) where
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import System.Process
import Data.ByteString.Lazy.UTF8 (fromString)
import Common.Annex
import Types.Remote
@ -25,8 +26,7 @@ import Remote.Helper.Ssh
import Remote.Helper.Special
import Remote.Helper.Encryptable
import Crypto
import Data.ByteString.Lazy.UTF8 (fromString)
import Data.Digest.Pure.SHA
import Utility.Hash
import Utility.UserInfo
import Annex.Content
import Annex.UUID
@ -277,7 +277,7 @@ bup2GitRemote r
bupRef :: Key -> String
bupRef k
| Git.Ref.legal True shown = shown
| otherwise = "git-annex-" ++ showDigest (sha256 (fromString shown))
| otherwise = "git-annex-" ++ show (sha256 (fromString shown))
where
shown = key2file k

View file

@ -1,6 +1,7 @@
{- Calculating a SHA checksum with an external command.
-
- This is often faster than using Haskell libraries.
- This is typically a bit faster than using Haskell libraries,
- by around 1% to 10%. Worth it for really big files.
-
- Copyright 2011-2013 Joey Hess <joey@kitenet.net>
-

29
Utility/Hash.hs Normal file
View file

@ -0,0 +1,29 @@
{- Convenience wrapper around cryptohash.
-
- The resulting Digests can be shown to get a canonical hash encoding. -}
module Utility.Hash where
import Crypto.Hash
import qualified Data.ByteString.Lazy as L
sha1 :: L.ByteString -> Digest SHA1
sha1 = hashlazy
sha224 :: L.ByteString -> Digest SHA224
sha224 = hashlazy
sha256 :: L.ByteString -> Digest SHA256
sha256 = hashlazy
sha384 :: L.ByteString -> Digest SHA384
sha384 = hashlazy
sha512 :: L.ByteString -> Digest SHA512
sha512 = hashlazy
-- sha3 is not yet fully standardized
--sha3 :: L.ByteString -> Digest SHA3
--sha3 = hashlazy

View file

@ -12,6 +12,7 @@ module Utility.WebApp where
import Common
import Utility.Tmp
import Utility.FileMode
import Utility.Hash
import qualified Yesod
import qualified Network.Wai as Wai
@ -24,7 +25,6 @@ import qualified Data.CaseInsensitive as CI
import Network.Socket
import Control.Exception
import Crypto.Random
import Data.Digest.Pure.SHA
import qualified Web.ClientSession as CS
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.UTF8 as L8
@ -214,7 +214,7 @@ genRandomToken = do
return $
case genBytes 512 g of
Left e -> error $ "failed to generate secret token: " ++ show e
Right (s, _) -> showDigest $ sha512 $ L.fromChunks [s]
Right (s, _) -> show $ sha512 $ L.fromChunks [s]
{- A Yesod isAuthorized method, which checks the auth cgi parameter
- against a token extracted from the Yesod application.

8
debian/changelog vendored
View file

@ -1,3 +1,11 @@
git-annex (4.20130921) UNRELEASED; urgency=low
* Use cryptohash rather than SHA for hashing when no external hash program
is available. This is a significant speedup for SHA256 on OSX, for
example.
-- Joey Hess <joeyh@debian.org> Sun, 22 Sep 2013 19:42:29 -0400
git-annex (4.20130920) unstable; urgency=low
* webapp: Initial support for setting up encrypted removable drives.

1
debian/control vendored
View file

@ -9,6 +9,7 @@ Build-Depends:
libghc-hslogger-dev,
libghc-pcre-light-dev,
libghc-sha-dev,
libghc-cryptohash-dev,
libghc-regex-tdfa-dev [!mips !mipsel !s390],
libghc-dataenc-dev,
libghc-utf8-string-dev,

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="https://me.yahoo.com/a/FHnTlSBo1eCGJRwueeKeB6.RCaPbGMPr5jxx8A--#ce0d8"
nickname="Hamza"
subject="comment 1"
date="2013-09-22T21:18:17Z"
content="""
Just clone the repository on another computer or usb drive and enable box.com remote as long as you have the clone of the repo you can download your files back.
"""]]

View file

@ -0,0 +1,22 @@
[[!comment format=mdwn
username="John"
ip="109.242.130.160"
subject="comment 2"
date="2013-09-22T22:20:22Z"
content="""
Thank you Hamza!
I 'm new on git, so please excuse my trivial questions:
a) I am using the git-annex assistant, is it something I can do from there or is it command line only?
I googled a bit and from what I can tell, I should make a directory on the usb drive, go there and do
> $ git clone /path/to/fullArchiveRepo
Would that be correct?
b) Assuming I 've done it correctly, then I put the USB on a drawer and leave it there for a month. In the meantime, I 've been using the repo on my pc and more files have been archived encrypted on Box.com. Then my local pc dies. When I plug the usb on the new pc, will I be able to recover all the encrypted files, or only those up to 1 month ago?
c) What is the proper process to use the cloned repo on a new pc? Plug the usb drive, open the git-annex assistant and go through the \"create new repo\" but use the path for the existing repo on the usb? Then add another repo from Box (with the same account and the same directory there? Would that work?
Thank you for your time & knowledge! :)
"""]]

View file

@ -7,6 +7,7 @@ quite a lot.
* [MissingH](http://github.com/jgoerzen/missingh/wiki)
* [utf8-string](http://hackage.haskell.org/package/utf8-string)
* [SHA](http://hackage.haskell.org/package/SHA)
* [cryptohash](http://hackage.haskell.org/package/cryptohash)
* [dataenc](http://hackage.haskell.org/package/dataenc)
* [monad-control](http://hackage.haskell.org/package/monad-control)
* [QuickCheck 2](http://hackage.haskell.org/package/QuickCheck)

View file

@ -0,0 +1,68 @@
After you've used git-annex for a while, you will have data in your repository
that you don't want to keep in the limited disk space of a laptop or a server,
but that you don't want to entirely delete.
This is where git-annex's support for offline archive drives shines.
You can move old files to an archive drive, which can be kept offline if
it's not practical to keep it spinning. Better, you can move old files to
two or more archive drives, in case one of them later fails to spin up.
(One consideration when [[future_proofing]] your archive.)
To set up an archive drive, you can take any removable drive, format
it with a filesystem you'll be able to read some years later, and then follow
the [[walkthrough]] to set up a repository on it that is a git remote of
the repository in your computer you want to archive. In short:
cd /media/archive
git clone ~/annex
cd ~/annex
git remote add archivedrive /media/archive/annex
git annex sync archive
Don't forget to tell git-annex this is an archive drive (or perhaps a backup
drive). Also, give the drive a description that matches something you write on
its label, so you can find it later:
git annex group archivedrive archive
git annex describe archivedrive "my first archive drive (SATA)"
Or you can use the assistant to set up the drive for you.
(Nice video tutorial here: [[videos/git-annex_assistant_archiving]])
(Keeping the archive drive in an offsite location? Consider encrypting
it! See [[fully_encrypted_git_repositories_with_gcrypt].])
Then, when the archive drive is plugged in, you can easily copy files to
it:
cd ~/annex
git-annex copy --auto --to archivedrive
Or, if you're using the assistant, it will automatically notice when the drive
gets plugged in and copy files that need to be archived.
When you want to get rid of the local file, leaving only the copy on the
archive, you can just:
git annex drop file
The archive drive has to be plugged in for this to work, so git-annex
can verify it still has the file. If you had configured git-annex to
always store 2 [[copies]], it will need 2 archive drives plugged in.
You may find it useful to configure a [[trust]] setting for the drive to
avoid needing to haul it out of storage to drop a file.
Now the really nice thing. When your archive drive gets filled up, you
can simply remove it, store it somewhere safe, and replace it with a new
drive, which can be mounted at the same location for simplicity. Set up
the new drive the same way described above, and use it to archive even more
files.
Finally, when you want to access one of the files you archived, you can
just ask for it:
git annex get file
If necessary git-annex will tell you which archive drive you need to
pull out of storage to get the file back. This is where the description
you entered earlier comes in handy.

View file

@ -1,7 +1,7 @@
### use case: The Archivist
Bob has many drives to archive his data, most of them kept offline, in a
safe place.
Bob has many drives to archive his data, most of them
[[kept offline|tips/offline_archive_drives]], in a safe place.
With git-annex, Bob has a single directory tree that includes all
his files, even if their content is being stored offline. He can

View file

@ -76,7 +76,7 @@ Executable git-annex
Build-Depends: MissingH, hslogger, directory, filepath,
containers, utf8-string, network (>= 2.0), mtl (>= 2),
bytestring, old-locale, time, HTTP,
extensible-exceptions, dataenc, SHA, process, json,
extensible-exceptions, dataenc, SHA, cryptohash, process, json,
base (>= 4.5 && < 4.8), monad-control, MonadCatchIO-transformers,
IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process,
SafeSemaphore, uuid, random, dlist, unix-compat