git-annex/Backend/SHA.hs
Joey Hess ef3457196a use SHA256 by default
To get old behavior, add a .gitattributes containing: * annex.backend=WORM

I feel that SHA256 is a better default for most people, as long as their
systems are fast enough that checksumming their files isn't a problem.
git-annex should default to preserving the integrity of data as well as git
does. Checksum backends also work better with editing files via
unlock/lock.

I considered just using SHA1, but since that hash is believed to be somewhat
near to being broken, and git-annex deals with large files which would be a
perfect exploit medium, I decided to go to a SHA-2 hash.

SHA512 is annoyingly long when displayed, and git-annex displays it in a
few places (and notably it is shown in ls -l), so I picked the shorter
hash. Considered SHA224 as it's even shorter, but feel it's a bit weird.

I expect git-annex will use SHA-3 at some point in the future, but
probably not soon!

Note that systems without a sha256sum (or sha256) program will fall back to
defaulting to SHA1.
2011-11-04 15:51:01 -04:00

114 lines
2.9 KiB
Haskell

{- git-annex SHA backend
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Backend.SHA (backends) where
import Common.Annex
import qualified Annex
import Annex.Content
import Types.Backend
import Types.Key
import qualified Build.SysConfig as SysConfig
type SHASize = Int
-- order is slightly significant; want SHA256 first, and more general
-- sizes earlier
sizes :: [Int]
sizes = [256, 1, 512, 224, 384]
backends :: [Backend Annex]
backends = catMaybes $ map genBackend sizes ++ map genBackendE sizes
genBackend :: SHASize -> Maybe (Backend Annex)
genBackend size
| isNothing (shaCommand size) = Nothing
| otherwise = Just b
where
b = Types.Backend.Backend
{ name = shaName size
, getKey = keyValue size
, fsckKey = checkKeyChecksum size
}
genBackendE :: SHASize -> Maybe (Backend Annex)
genBackendE size =
case genBackend size of
Nothing -> Nothing
Just b -> Just $ b
{ name = shaNameE size
, getKey = keyValueE size
}
shaCommand :: SHASize -> Maybe String
shaCommand 1 = SysConfig.sha1
shaCommand 256 = SysConfig.sha256
shaCommand 224 = SysConfig.sha224
shaCommand 384 = SysConfig.sha384
shaCommand 512 = SysConfig.sha512
shaCommand _ = Nothing
shaName :: SHASize -> String
shaName size = "SHA" ++ show size
shaNameE :: SHASize -> String
shaNameE size = shaName size ++ "E"
shaN :: SHASize -> FilePath -> Annex String
shaN size file = do
showAction "checksum"
liftIO $ pOpen ReadFromPipe command (toCommand [File file]) $ \h -> do
line <- hGetLine h
let bits = split " " line
if null bits
then error $ command ++ " parse error"
else return $ head bits
where
command = fromJust $ shaCommand size
{- A key is a checksum of its contents. -}
keyValue :: SHASize -> FilePath -> Annex (Maybe Key)
keyValue size file = do
s <- shaN size file
stat <- liftIO $ getFileStatus file
return $ Just $ stubKey
{ keyName = s
, keyBackendName = shaName size
, keySize = Just $ fromIntegral $ fileSize stat
}
{- Extension preserving keys. -}
keyValueE :: SHASize -> FilePath -> Annex (Maybe Key)
keyValueE size file = keyValue size file >>= maybe (return Nothing) addE
where
addE k = return $ Just $ k
{ keyName = keyName k ++ extension
, keyBackendName = shaNameE size
}
naiveextension = takeExtension file
extension =
if length naiveextension > 6
then "" -- probably not really an extension
else naiveextension
{- A key's checksum is checked during fsck. -}
checkKeyChecksum :: SHASize -> Key -> Annex Bool
checkKeyChecksum size key = do
g <- gitRepo
fast <- Annex.getState Annex.fast
let file = gitAnnexLocation g key
present <- liftIO $ doesFileExist file
if not present || fast
then return True
else check =<< shaN size file
where
check s
| s == dropExtension (keyName key) = return True
| otherwise = do
dest <- moveBad key
warning $ "Bad file content; moved to " ++ dest
return False