diff --git a/Backend/Worm.hs b/Backend/Worm.hs index ba79428efa..89fe4bf572 100644 --- a/Backend/Worm.hs +++ b/Backend/Worm.hs @@ -3,16 +3,36 @@ module Backend.Worm (backend) where +import Control.Monad.State import qualified Backend.File import BackendTypes import Utility import System.FilePath +import System.Posix.Files +import Data.Digest.Pure.SHA -- slow, but we only checksum filenames +import qualified Data.ByteString.Lazy.Char8 as B backend = Backend.File.backend { name = "WORM", getKey = keyValue } --- direct mapping from basename of filename to key +-- A SHA1 of the basename of the filename, plus the file size and +-- modification time, is used as the unique part of the key. That +-- allows multiple files with the same names to have different keys, +-- while also allowing a file to be moved around while retaining the +-- same key. +-- +-- The basename of the filename is also included in the key, so it's clear +-- what the original filename was when a user sees the value. keyValue :: FilePath -> Annex (Maybe Key) -keyValue file = return $ Just $ Key ((name backend), (takeFileName file)) +keyValue file = do + stat <- liftIO $ getFileStatus file + return $ Just $ Key ((name backend), key stat) + where + key stat = (checksum $ uniqueid stat) ++ sep ++ base + checksum s = show $ sha1 $ B.pack s + uniqueid stat = (show $ fileSize stat) ++ sep ++ + (show $ modificationTime stat) + base = takeFileName file + sep = ":" diff --git a/git-annex.mdwn b/git-annex.mdwn index fba9648dba..2079b5b466 100644 --- a/git-annex.mdwn +++ b/git-annex.mdwn @@ -94,10 +94,10 @@ Multiple pluggable backends are supported, and more than one can be used to store different files' contents in a given repository. * `WORM` ("Write Once, Read Many") This backend stores the file's content - in `.git/annex/`, and assumes that any file with the same basename - has the same content. So with this backend, files can be moved around, - but should never be added to or changed. This is the default, and - the least expensive backend. + in `.git/annex/`, and assumes that any file with the same basename, + size, and modification time has the same content. So with this backend, + files can be moved around, but should never be added to or changed. + This is the default, and the least expensive backend. * `sha1sum` -- This backend stores the file's content in `.git/annex/`, with a name based on its sha1 checksum. This backend allows modifications of files to be tracked. Its need to generate checksums