better worm keys

This commit is contained in:
Joey Hess 2010-10-15 18:57:05 -04:00
parent 8e742bd89e
commit 44b8f7c95d
2 changed files with 26 additions and 6 deletions

View file

@ -3,16 +3,36 @@
module Backend.Worm (backend) where
import Control.Monad.State
import qualified Backend.File
import BackendTypes
import Utility
import System.FilePath
import System.Posix.Files
import Data.Digest.Pure.SHA -- slow, but we only checksum filenames
import qualified Data.ByteString.Lazy.Char8 as B
backend = Backend.File.backend {
name = "WORM",
getKey = keyValue
}
-- direct mapping from basename of filename to key
-- A SHA1 of the basename of the filename, plus the file size and
-- modification time, is used as the unique part of the key. That
-- allows multiple files with the same names to have different keys,
-- while also allowing a file to be moved around while retaining the
-- same key.
--
-- The basename of the filename is also included in the key, so it's clear
-- what the original filename was when a user sees the value.
keyValue :: FilePath -> Annex (Maybe Key)
keyValue file = return $ Just $ Key ((name backend), (takeFileName file))
keyValue file = do
stat <- liftIO $ getFileStatus file
return $ Just $ Key ((name backend), key stat)
where
key stat = (checksum $ uniqueid stat) ++ sep ++ base
checksum s = show $ sha1 $ B.pack s
uniqueid stat = (show $ fileSize stat) ++ sep ++
(show $ modificationTime stat)
base = takeFileName file
sep = ":"

View file

@ -94,10 +94,10 @@ Multiple pluggable backends are supported, and more than one can be used
to store different files' contents in a given repository.
* `WORM` ("Write Once, Read Many") This backend stores the file's content
in `.git/annex/`, and assumes that any file with the same basename
has the same content. So with this backend, files can be moved around,
but should never be added to or changed. This is the default, and
the least expensive backend.
in `.git/annex/`, and assumes that any file with the same basename,
size, and modification time has the same content. So with this backend,
files can be moved around, but should never be added to or changed.
This is the default, and the least expensive backend.
* `sha1sum` -- This backend stores the file's content in
`.git/annex/`, with a name based on its sha1 checksum. This backend allows
modifications of files to be tracked. Its need to generate checksums