add equivilant key log for VURL keys

When downloading a VURL from the web, make sure that the equivilant key
log is populated.

Unfortunately, this does not hash the content while it's being
downloaded from the web. There is not an interface in Backend currently
for incrementally hash generation, only for incremental verification of an
existing hash. So this might add a noticiable delay, and it has to show
a "(checksum...") message. This could stand to be improved.

But, that separate hashing step only has to happen on the first download
of new content from the web. Once the hash is known, the VURL key can have
its hash verified incrementally while downloading except when the
content in the web has changed. (Doesn't happen yet because
verifyKeyContentIncrementally is not implemented yet for VURL keys.)

Note that the equivilant key log file is formatted as a presence log.
This adds a tiny bit of overhead (eg "1 ") per line over just listing the
urls. The reason I chose to use that format is it seems possible that
there will need to be a way to remove an equivilant key at some point in
the future. I don't know why that would be necessary, but it seemed wise
to allow for the possibility.

Downloads of VURL keys from other special remotes that claim urls,
like bittorrent for example, does not popilate the equivilant key log.
So for now, no checksum verification will be done for those.

Sponsored-by: Nicholas Golder-Manning on Patreon
This commit is contained in:
Joey Hess 2024-02-29 15:41:57 -04:00
parent 0f7143d226
commit 55bf01b788
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 125 additions and 14 deletions

View file

@ -1,6 +1,6 @@
{- git-annex key/value backends {- git-annex key/value backends
- -
- Copyright 2010-2021 Joey Hess <id@joeyh.name> - Copyright 2010-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -10,6 +10,7 @@
module Backend ( module Backend (
builtinList, builtinList,
defaultBackend, defaultBackend,
defaultHashBackend,
genKey, genKey,
getBackend, getBackend,
chooseBackend, chooseBackend,
@ -18,6 +19,7 @@ module Backend (
maybeLookupBackendVariety, maybeLookupBackendVariety,
isStableKey, isStableKey,
isCryptographicallySecure, isCryptographicallySecure,
isCryptographicallySecure',
) where ) where
import Annex.Common import Annex.Common
@ -40,7 +42,13 @@ import qualified Data.Map as M
builtinList :: [Backend] builtinList :: [Backend]
builtinList = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends builtinList = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends
{- Backend to use by default when generating a new key. -} {- The default hashing backend. This must use a cryptographically secure
- hash. -}
defaultHashBackend :: Backend
defaultHashBackend = Prelude.head builtinList
{- Backend to use by default when generating a new key. Takes git config
- and --backend option into account. -}
defaultBackend :: Annex Backend defaultBackend :: Annex Backend
defaultBackend = maybe cache return =<< Annex.getState Annex.backend defaultBackend = maybe cache return =<< Annex.getState Annex.backend
where where
@ -49,7 +57,7 @@ defaultBackend = maybe cache return =<< Annex.getState Annex.backend
=<< Annex.getRead Annex.forcebackend =<< Annex.getRead Annex.forcebackend
b <- case n of b <- case n of
Just name | valid name -> lookupname name Just name | valid name -> lookupname name
_ -> pure (Prelude.head builtinList) _ -> pure defaultHashBackend
Annex.changeState $ \s -> s { Annex.backend = Just b } Annex.changeState $ \s -> s { Annex.backend = Just b }
return b return b
valid name = not (null name) valid name = not (null name)
@ -116,5 +124,8 @@ isStableKey k = maybe False (`B.isStableKey` k)
<$> maybeLookupBackendVariety (fromKey keyVariety k) <$> maybeLookupBackendVariety (fromKey keyVariety k)
isCryptographicallySecure :: Key -> Annex Bool isCryptographicallySecure :: Key -> Annex Bool
isCryptographicallySecure k = maybe False B.isCryptographicallySecure isCryptographicallySecure k = maybe False isCryptographicallySecure'
<$> maybeLookupBackendVariety (fromKey keyVariety k) <$> maybeLookupBackendVariety (fromKey keyVariety k)
isCryptographicallySecure' :: Backend -> Bool
isCryptographicallySecure' = B.isCryptographicallySecure

View file

@ -11,6 +11,7 @@ module Backend.Hash (
backends, backends,
testKeyBackend, testKeyBackend,
keyHash, keyHash,
descChecksum,
) where ) where
import Annex.Common import Annex.Common

View file

@ -15,6 +15,7 @@ import Annex.Common
import Types.Key import Types.Key
import Types.Backend import Types.Backend
import Backend.Utilities import Backend.Utilities
import Logs.EquivilantKeys
backends :: [Backend] backends :: [Backend]
backends = [backendURL, backendVURL] backends = [backendURL, backendVURL]

19
Logs.hs
View file

@ -1,6 +1,6 @@
{- git-annex log file names {- git-annex log file names
- -
- Copyright 2013-2023 Joey Hess <id@joeyh.name> - Copyright 2013-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -35,7 +35,9 @@ getLogVariety config f
| isRemoteStateLog f = Just NewUUIDBasedLog | isRemoteStateLog f = Just NewUUIDBasedLog
| isRemoteContentIdentifierLog f = Just NewUUIDBasedLog | isRemoteContentIdentifierLog f = Just NewUUIDBasedLog
| isRemoteMetaDataLog f = Just RemoteMetaDataLog | isRemoteMetaDataLog f = Just RemoteMetaDataLog
| isMetaDataLog f || f `elem` otherTopLevelLogs = Just OtherLog | isMetaDataLog f
|| f `elem` otherTopLevelLogs
|| isEquivilantKeyLog f = Just OtherLog
| otherwise = (LocationLog <$> locationLogFileKey config f) | otherwise = (LocationLog <$> locationLogFileKey config f)
<|> (ChunkLog <$> extLogFileKey chunkLogExt f) <|> (ChunkLog <$> extLogFileKey chunkLogExt f)
<|> (UrlLog <$> urlLogFileKey f) <|> (UrlLog <$> urlLogFileKey f)
@ -70,6 +72,7 @@ keyLogFiles config k =
, remoteMetaDataLogFile config k , remoteMetaDataLogFile config k
, remoteContentIdentifierLogFile config k , remoteContentIdentifierLogFile config k
, chunkLogFile config k , chunkLogFile config k
, equivilantKeysLogFile config k
] ++ oldurlLogs config k ] ++ oldurlLogs config k
{- All uuid-based logs stored in the top of the git-annex branch. -} {- All uuid-based logs stored in the top of the git-annex branch. -}
@ -208,6 +211,18 @@ chunkLogFile config key =
chunkLogExt :: S.ByteString chunkLogExt :: S.ByteString
chunkLogExt = ".log.cnk" chunkLogExt = ".log.cnk"
{- The filename of the equivilant keys log for a given key. -}
equivilantKeysLogFile :: GitConfig -> Key -> RawFilePath
equivilantKeysLogFile config key =
(branchHashDir config key P.</> keyFile key)
<> equivilantKeyLogExt
equivilantKeyLogExt :: S.ByteString
equivilantKeyLogExt = ".log.ek"
isEquivilantKeyLog :: RawFilePath -> Bool
isEquivilantKeyLog path = equivilantKeyLogExt `S.isSuffixOf` path
{- The filename of the metadata log for a given key. -} {- The filename of the metadata log for a given key. -}
metaDataLogFile :: GitConfig -> Key -> RawFilePath metaDataLogFile :: GitConfig -> Key -> RawFilePath
metaDataLogFile config key = metaDataLogFile config key =

31
Logs/EquivilantKeys.hs Normal file
View file

@ -0,0 +1,31 @@
{- Logs listing keys that are equivilant to a key.
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Logs.EquivilantKeys (
getEquivilantKeys,
setEquivilantKey,
) where
import Annex.Common
import qualified Annex
import Logs
import Logs.Presence
import qualified Annex.Branch
getEquivilantKeys :: Key -> Annex [Key]
getEquivilantKeys key = do
config <- Annex.getGitConfig
mapMaybe (deserializeKey' . fromLogInfo)
<$> presentLogInfo (equivilantKeysLogFile config key)
setEquivilantKey :: Key -> Key -> Annex ()
setEquivilantKey key equivkey = do
config <- Annex.getGitConfig
addLog (Annex.Branch.RegardingUUID []) (equivilantKeysLogFile config key)
InfoPresent (LogInfo (serializeKey' equivkey))

View file

@ -1,6 +1,6 @@
{- Web remote. {- Web remote.
- -
- Copyright 2011-2023 Joey Hess <id@joeyh.name> - Copyright 2011-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -11,6 +11,8 @@ import Annex.Common
import Types.Remote import Types.Remote
import Types.ProposedAccepted import Types.ProposedAccepted
import Types.Creds import Types.Creds
import Types.Key
import Types.KeySource
import Remote.Helper.Special import Remote.Helper.Special
import Remote.Helper.ExportImport import Remote.Helper.ExportImport
import qualified Git import qualified Git
@ -27,6 +29,9 @@ import qualified Annex.Url as Url
import Annex.YoutubeDl import Annex.YoutubeDl
import Annex.SpecialRemote.Config import Annex.SpecialRemote.Config
import Logs.Remote import Logs.Remote
import Logs.EquivilantKeys
import Backend
import Backend.Hash (descChecksum)
import qualified Data.Map as M import qualified Data.Map as M
@ -124,22 +129,61 @@ downloadKey urlincludeexclude key _af dest p vc =
, "known url(s) failed" , "known url(s) failed"
] ]
isyoutube (_, YoutubeDownloader) = True
isyoutube _ = False
dl ([], ytus) = flip getM (map fst ytus) $ \u -> dl ([], ytus) = flip getM (map fst ytus) $ \u ->
ifM (youtubeDlTo key u dest p) ifM (youtubeDlTo key u dest p)
( return (Just UnVerified) ( postdl UnVerified
, return Nothing , return Nothing
) )
dl (us, ytus) = do dl (us, ytus) = do
iv <- startVerifyKeyContentIncrementally vc key iv <- startVerifyKeyContentIncrementally vc key
ifM (Url.withUrlOptions $ downloadUrl True key p iv (map fst us) dest) ifM (Url.withUrlOptions $ downloadUrl True key p iv (map fst us) dest)
( finishVerifyKeyContentIncrementally iv >>= \case ( finishVerifyKeyContentIncrementally iv >>= \case
(True, v) -> return (Just v) (True, v) -> postdl v
(False, _) -> dl ([], ytus) (False, _) -> dl ([], ytus)
, dl ([], ytus) , dl ([], ytus)
) )
isyoutube (_, YoutubeDownloader) = True postdl v@Verified = return (Just v)
isyoutube _ = False postdl v = do
when (fromKey keyVariety key == VURLKey) $
recordvurlkey
return (Just v)
-- For a VURL key that was not verified on download,
-- need to generate a hashed key for the content downloaded
-- from the web, and record it for later use verifying this content.
--
-- But when the VURL key has a known size, and already has a
-- recorded hashed key, don't record a new key, since the content
-- on the web is expected to be stable for such a key.
recordvurlkey = case fromKey keySize key of
Nothing -> recordvurlkey' =<< getEquivilantKeys key
Just _ -> do
eks <- getEquivilantKeys key
if null eks
then recordvurlkey' eks
else return ()
recordvurlkey' eks = do
-- Make sure to pick a backend that is cryptographically
-- secure.
db <- defaultBackend
let b = if isCryptographicallySecure' db
then db
else defaultHashBackend
showSideAction (UnquotedString descChecksum)
(hashk, _) <- genKey ks nullMeterUpdate b
unless (hashk `elem` eks) $
setEquivilantKey key hashk
where
ks = KeySource
{ keyFilename = mempty -- avoid adding any extension
, contentLocation = toRawFilePath dest
, inodeCache = Nothing
}
uploadKey :: Key -> AssociatedFile -> MeterUpdate -> Annex () uploadKey :: Key -> AssociatedFile -> MeterUpdate -> Annex ()
uploadKey _ _ _ = giveup "upload to web not supported" uploadKey _ _ _ = giveup "upload to web not supported"

View file

@ -134,12 +134,12 @@ buildKeyData k = byteString (formatKeyVariety (keyVariety k))
c ?: (Just b) = sepbefore (char7 c <> b) c ?: (Just b) = sepbefore (char7 c <> b)
_ ?: Nothing = mempty _ ?: Nothing = mempty
{- This is a strict parser for security reasons; a key {- This is a strict parser for security reasons; in addition to keyName,
- can contain only 4 fields, which all consist only of numbers. - a key can contain only 4 fields, which all consist only of numbers.
- Any key containing other fields, or non-numeric data will fail - Any key containing other fields, or non-numeric data will fail
- to parse. - to parse.
- -
- If a key contained non-numeric fields, they could be used to - If a key contained other non-numeric fields, they could be used to
- embed data used in a SHA1 collision attack, which would be a - embed data used in a SHA1 collision attack, which would be a
- problem since the keys are committed to git. - problem since the keys are committed to git.
-} -}

View file

@ -224,6 +224,13 @@ These log files record urls used by the
Their format is similar to the location tracking files, but with urls Their format is similar to the location tracking files, but with urls
rather than UUIDs. rather than UUIDs.
## `aaa/bbb/*.log.ek`
These log files record other keys that are equivilant to the key
used in the filename. This is currently used for the `VURL` backend.
Their format is similar to the location tracking files, but with keys
rather than UUIDs.
## `aaa/bbb/*.log.rmt` ## `aaa/bbb/*.log.rmt`
These log files are used by remotes that need to record their own state These log files are used by remotes that need to record their own state

View file

@ -812,6 +812,7 @@ Executable git-annex
Logs.ContentIdentifier.Pure Logs.ContentIdentifier.Pure
Logs.Difference Logs.Difference
Logs.Difference.Pure Logs.Difference.Pure
Logs.EquivilantKeys
Logs.Export Logs.Export
Logs.Export.Pure Logs.Export.Pure
Logs.File Logs.File