From ea63d1dfe3a74f57e4f77ccc950b4d917535deeb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 29 Jul 2020 17:12:22 -0400 Subject: [PATCH] E variant of external backend keys --- Backend/External.hs | 28 +++++++++++++--- Backend/Hash.hs | 40 +++------------------- Backend/Utilities.hs | 41 ++++++++++++++++++++++- doc/backends.mdwn | 4 +++ doc/design/external_backend_protocol.mdwn | 3 +- 5 files changed, 75 insertions(+), 41 deletions(-) diff --git a/Backend/External.hs b/Backend/External.hs index 995445327c..6b2d062ecf 100644 --- a/Backend/External.hs +++ b/Backend/External.hs @@ -11,6 +11,7 @@ module Backend.External (makeBackend) where import Annex.Common import Annex.ExternalAddonProcess +import Backend.Utilities import Types.Key import Types.Backend import Types.KeySource @@ -19,6 +20,7 @@ import qualified Utility.SimpleProtocol as Proto import qualified Data.ByteString as S import qualified Data.Map.Strict as M +import Data.Char import Control.Concurrent import System.IO.Unsafe (unsafePerformIO) import System.Log.Logger (debugM) @@ -92,8 +94,7 @@ genKeyExternal ebname hasext ks meterupdate = req = GENKEY (fromRawFilePath (contentLocation ks)) notavail = giveup $ "Cannot generate a key, since " ++ externalBackendProgram ebname ++ " is not available." - -- TODO hasExt handling - go (GENKEY_SUCCESS (ProtoKey k)) = result k + go (GENKEY_SUCCESS pk) = Just $ Result <$> fromProtoKey pk hasext ks go (GENKEY_FAILURE msg) = Just $ giveup $ "External backend program failed to generate a key: " ++ msg go (PROGRESS bytesprocessed) = Just $ do @@ -106,8 +107,7 @@ verifyKeyContentExternal ebname hasext meterupdate k f = withExternalState ebname hasext $ \st -> handleRequest st req notavail go where - -- TODO hasExt handling - req = VERIFYKEYCONTENT (ProtoKey k) f + req = VERIFYKEYCONTENT (toProtoKey k) f -- This should not be able to happen, because CANVERIFY is checked -- before this function is enable, and so the external program @@ -273,6 +273,26 @@ withExternalState bname hasext a = do newtype ProtoKey = ProtoKey Key deriving (Show) +fromProtoKey :: ProtoKey -> HasExt -> KeySource -> Annex Key +fromProtoKey (ProtoKey k) (HasExt False) _ = pure k +fromProtoKey (ProtoKey k) hasext@(HasExt True) source = + addE source (setHasExt hasext) k + +toProtoKey :: Key -> ProtoKey +toProtoKey k = ProtoKey $ alterKey k $ \d -> d + -- The extension can be easily removed, because the protocol + -- documentation does not allow '.' to be used in the keyName, + -- so the first one is the extension. + { keyName = S.takeWhile (/= dot) (keyName d) + , keyVariety = setHasExt (HasExt False) (keyVariety d) + } + where + dot = fromIntegral (ord '.') + +setHasExt :: HasExt -> KeyVariety -> KeyVariety +setHasExt hasext (ExternalKey name _) = ExternalKey name hasext +setHasExt _ v = v + instance Proto.Serializable ProtoKey where serialize (ProtoKey k) = Proto.serialize k deserialize = fmap ProtoKey . Proto.deserialize diff --git a/Backend/Hash.hs b/Backend/Hash.hs index 2b8bcd78d3..e80ad4216e 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -20,13 +20,11 @@ import Types.Backend import Types.KeySource import Utility.Hash import Utility.Metered +import Backend.Utilities import qualified Data.ByteString as S import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Lazy as L -import qualified System.FilePath.ByteString as P -import Data.Char -import Data.Word import Control.DeepSeq import Control.Exception (evaluate) @@ -114,29 +112,8 @@ keyValue hash source meterupdate = do {- Extension preserving keys. -} keyValueE :: Hash -> KeySource -> MeterUpdate -> Annex Key keyValueE hash source meterupdate = - keyValue hash source meterupdate >>= addE - where - addE k = do - maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig - let ext = selectExtension maxlen (keyFilename source) - return $ alterKey k $ \d -> d - { keyName = keyName d <> ext - , keyVariety = hashKeyVariety hash (HasExt True) - } - -selectExtension :: Maybe Int -> RawFilePath -> S.ByteString -selectExtension maxlen f - | null es = "" - | otherwise = S.intercalate "." ("":es) - where - es = filter (not . S.null) $ reverse $ - take 2 $ filter (S.all validInExtension) $ - takeWhile shortenough $ - reverse $ S.split (fromIntegral (ord '.')) (P.takeExtensions f) - shortenough e = S.length e <= fromMaybe maxExtensionLen maxlen - -maxExtensionLen :: Int -maxExtensionLen = 4 -- long enough for "jpeg" + keyValue hash source meterupdate + >>= addE source (const $ hashKeyVariety hash (HasExt True)) {- A key's checksum is checked during fsck when it's content is present - except for in fast mode. -} @@ -166,13 +143,6 @@ checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do keyHash :: Key -> S.ByteString keyHash = fst . splitKeyNameExtension -validInExtension :: Word8 -> Bool -validInExtension c - | isAlphaNum (chr (fromIntegral c)) = True - | fromIntegral c == ord '.' = True - | c <= 127 = False -- other ascii: spaces, punctuation, control chars - | otherwise = True -- utf8 is allowed, also other encodings - {- Upgrade keys that have the \ prefix on their hash due to a bug, or - that contain non-alphanumeric characters in their extension. - @@ -310,10 +280,10 @@ testKeyBackend = let b = genBackendE (SHA2Hash (HashSize 256)) gk = case genKey b of Nothing -> Nothing - Just f -> Just (\ks p -> addE <$> f ks p) + Just f -> Just (\ks p -> addTestE <$> f ks p) in b { genKey = gk } where - addE k = alterKey k $ \d -> d + addTestE k = alterKey k $ \d -> d { keyName = keyName d <> longext } longext = ".this-is-a-test-key" diff --git a/Backend/Utilities.hs b/Backend/Utilities.hs index 0baaa476c9..16bbbdc9f9 100644 --- a/Backend/Utilities.hs +++ b/Backend/Utilities.hs @@ -1,17 +1,25 @@ {- git-annex backend utilities - - - Copyright 2012-2019 Joey Hess + - Copyright 2012-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE OverloadedStrings #-} + module Backend.Utilities where import Annex.Common +import qualified Annex import Utility.Hash +import Types.Key +import Types.KeySource import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L +import qualified System.FilePath.ByteString as P +import Data.Char +import Data.Word {- Generates a keyName from an input string. Takes care of sanitizing it. - If it's not too long, the full string is used as the keyName. @@ -32,3 +40,34 @@ genKeyName s sha256len = 64 md5len = 32 +{- Converts a key to a version that includes an extension from the + - file that the key was generated from. -} +addE :: KeySource -> (KeyVariety -> KeyVariety) -> Key -> Annex Key +addE source sethasext k = do + maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig + let ext = selectExtension maxlen (keyFilename source) + return $ alterKey k $ \d -> d + { keyName = keyName d <> ext + , keyVariety = sethasext (keyVariety d) + } + +selectExtension :: Maybe Int -> RawFilePath -> S.ByteString +selectExtension maxlen f + | null es = "" + | otherwise = S.intercalate "." ("":es) + where + es = filter (not . S.null) $ reverse $ + take 2 $ filter (S.all validInExtension) $ + takeWhile shortenough $ + reverse $ S.split (fromIntegral (ord '.')) (P.takeExtensions f) + shortenough e = S.length e <= fromMaybe maxExtensionLen maxlen + +validInExtension :: Word8 -> Bool +validInExtension c + | isAlphaNum (chr (fromIntegral c)) = True + | fromIntegral c == ord '.' = True + | c <= 127 = False -- other ascii: spaces, punctuation, control chars + | otherwise = True -- utf8 is allowed, also other encodings + +maxExtensionLen :: Int +maxExtensionLen = 4 -- long enough for "jpeg" diff --git a/doc/backends.mdwn b/doc/backends.mdwn index 63a12d1e91..66199f485f 100644 --- a/doc/backends.mdwn +++ b/doc/backends.mdwn @@ -89,6 +89,10 @@ Here's a list of external backends. Edit this page to add yours to the list. * [[design/external_backend_protocol/git-annex-backend-XFOO]] is a demo program implementing the protocol with a shell script. +Like with git-annex's builtin backends, you can add "E" to the end of the +name of an external backend, to get a version that includes the file +extension in the key. + ## notes If you want to be able to prove that you're working with the same file diff --git a/doc/design/external_backend_protocol.mdwn b/doc/design/external_backend_protocol.mdwn index b440367ac6..a4a8a22a48 100644 --- a/doc/design/external_backend_protocol.mdwn +++ b/doc/design/external_backend_protocol.mdwn @@ -154,7 +154,8 @@ The key name is typically some kind of hash, but is not limited to a hash. The length of it needs to be similar to the lengths of other git-annex keys. Too long a key name will make it annoying to work with repositories using them, or even cause problems due to filename length limits. 128 bytes -maximum, but shorter is better. +maximum, but shorter is better. It should be entirely ascii characters +in the set `A-Za-z0-9` and `-` is allowed, but other punctuation is not. It's important that, if the program responds with `ISCRYPTOGRAPHICALLYSECURE-YES`, the key name contains only a hash, and not