Added filename extension preserving variant backends SHA1E, SHA256E, etc.

This commit is contained in:
Joey Hess 2011-05-16 11:46:34 -04:00
parent 1d2984441c
commit 2a8efc7af1
4 changed files with 49 additions and 17 deletions

View file

@ -14,6 +14,7 @@ import System.IO
import System.Directory import System.Directory
import Data.Maybe import Data.Maybe
import System.Posix.Files import System.Posix.Files
import System.FilePath
import qualified Backend.File import qualified Backend.File
import BackendClass import BackendClass
@ -27,11 +28,14 @@ import qualified SysConfig
import Key import Key
type SHASize = Int type SHASize = Int
sizes :: [Int]
sizes = [1, 256, 512, 224, 384]
backends :: [Backend Annex] backends :: [Backend Annex]
-- order is slightly significant; want sha1 first ,and more general -- order is slightly significant; want sha1 first ,and more general
-- sizes earlier -- sizes earlier
backends = catMaybes $ map genBackend [1, 256, 512, 224, 384] backends = catMaybes $ map genBackend sizes ++ map genBackendE sizes
genBackend :: SHASize -> Maybe (Backend Annex) genBackend :: SHASize -> Maybe (Backend Annex)
genBackend size genBackend size
@ -44,6 +48,15 @@ genBackend size
, fsckKey = Backend.File.checkKey $ checkKeyChecksum size , fsckKey = Backend.File.checkKey $ checkKeyChecksum size
} }
genBackendE :: SHASize -> Maybe (Backend Annex)
genBackendE size =
case genBackend size of
Nothing -> Nothing
Just b -> Just $ b
{ name = shaNameE size
, getKey = keyValueE size
}
shaCommand :: SHASize -> Maybe String shaCommand :: SHASize -> Maybe String
shaCommand 1 = SysConfig.sha1 shaCommand 1 = SysConfig.sha1
shaCommand 256 = SysConfig.sha256 shaCommand 256 = SysConfig.sha256
@ -55,6 +68,9 @@ shaCommand _ = Nothing
shaName :: SHASize -> String shaName :: SHASize -> String
shaName size = "SHA" ++ show size shaName size = "SHA" ++ show size
shaNameE :: SHASize -> String
shaNameE size = shaName size ++ "E"
shaN :: SHASize -> FilePath -> Annex String shaN :: SHASize -> FilePath -> Annex String
shaN size file = do shaN size file = do
showNote "checksum..." showNote "checksum..."
@ -72,11 +88,25 @@ keyValue :: SHASize -> FilePath -> Annex (Maybe Key)
keyValue size file = do keyValue size file = do
s <- shaN size file s <- shaN size file
stat <- liftIO $ getFileStatus file stat <- liftIO $ getFileStatus file
return $ Just $ stubKey { return $ Just $ stubKey
keyName = s, { keyName = s
keyBackendName = shaName size, , keyBackendName = shaName size
keySize = Just $ fromIntegral $ fileSize stat , keySize = Just $ fromIntegral $ fileSize stat
} }
{- Extension preserving keys. -}
keyValueE :: SHASize -> FilePath -> Annex (Maybe Key)
keyValueE size file = keyValue size file >>= maybe (return Nothing) addE
where
addE k = return $ Just $ k
{ keyName = keyName k ++ extension
, keyBackendName = shaNameE size
}
naiveextension = takeExtension file
extension =
if length naiveextension > 6
then "" -- probably not really an extension
else naiveextension
-- A key's checksum is checked during fsck. -- A key's checksum is checked during fsck.
checkKeyChecksum :: SHASize -> Key -> Annex Bool checkKeyChecksum :: SHASize -> Key -> Annex Bool

1
debian/changelog vendored
View file

@ -6,6 +6,7 @@ git-annex (0.20110504) UNRELEASED; urgency=low
limits, disable encryption, support their nonstandard way of creating limits, disable encryption, support their nonstandard way of creating
buckets, and allow x-amz-* headers to be specified in initremote to set buckets, and allow x-amz-* headers to be specified in initremote to set
item metadata. item metadata.
* Added filename extension preserving variant backends SHA1E, SHA256E, etc.
-- Joey Hess <joeyh@debian.org> Fri, 06 May 2011 15:20:38 -0400 -- Joey Hess <joeyh@debian.org> Fri, 06 May 2011 15:20:38 -0400

View file

@ -23,6 +23,9 @@ these backends.
* `SHA512`, `SHA384`, `SHA256`, `SHA224` -- Like SHA1, but larger * `SHA512`, `SHA384`, `SHA256`, `SHA224` -- Like SHA1, but larger
checksums. Mostly useful for the very paranoid, or anyone who is checksums. Mostly useful for the very paranoid, or anyone who is
researching checksum collisions and wants to annex their colliding data. ;) researching checksum collisions and wants to annex their colliding data. ;)
* `SHA1E`, `SHA512E`, etc -- Variants that preserve filename extension as
part of the key. Useful for archival tasks where the filename extension
contains metadata that should be preserved.
These backends store file contents in other key/value stores. These backends store file contents in other key/value stores.

View file

@ -32,20 +32,18 @@ specify `x-archive-meta*` headers to add metadata as explained in their
Then you can annex files and copy them to the remote as usual: Then you can annex files and copy them to the remote as usual:
# git annex add photo1.jpeg # git annex add photo1.jpeg --backend=SHA1E
add photo1.jpeg ok add photo1.jpeg (checksum...) ok
# git annex copy photo1.jpeg --fast --to archive-panama # git annex copy photo1.jpeg --fast --to archive-panama
copy (to archive-panama...) ok copy (to archive-panama...) ok
----- Note the use of the SHA1E [[backend|backends]]. It makes most sense
to use the WORM or SHA1E backend for files that will be stored in
the Internet Archive, since the key name will be exposed as the filename
there, and since the Archive does special processing of files based on
their extension.
Note that it probably makes the most sense to use the WORM backend ----
for files, since that exposes the original filename in the key stored
in the Archive, which allows its special processing for sound files,
movies, etc to be done.
Also, the Internet Archive has restrictions on what is allowed in a
filename; particularly no spaces are allowed.
There seems to be a bug in either hS3 or the archive that breaks There seems to be a bug in either hS3 or the archive that breaks
authentication when the bucket name contains spaces or upper-case letters.. authentication when the bucket name contains spaces or upper-case letters..