annex.maxextensions configuration

Controls how many filename extensions to preserve.

Sponsored-by: the NIH-funded NICEMAN (ReproNim TR&D3) project
This commit is contained in:
Joey Hess 2024-04-18 14:23:05 -04:00
parent b700c48b15
commit c410b2bb73
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 51 additions and 22 deletions

View file

@ -387,7 +387,7 @@ prop_view_roundtrips (AssociatedFile Nothing) _ _ = True
prop_view_roundtrips (AssociatedFile (Just f)) metadata visible = or prop_view_roundtrips (AssociatedFile (Just f)) metadata visible = or
[ B.null (P.takeFileName f) && B.null (P.takeDirectory f) [ B.null (P.takeFileName f) && B.null (P.takeDirectory f)
, viewTooLarge view , viewTooLarge view
, all hasfields (viewedFiles view (viewedFileFromReference' Nothing) (fromRawFilePath f) metadata) , all hasfields (viewedFiles view (viewedFileFromReference' Nothing Nothing) (fromRawFilePath f) metadata)
] ]
where where
view = View (Git.Ref "foo") $ view = View (Git.Ref "foo") $

View file

@ -1,6 +1,6 @@
{- filenames (not paths) used in views {- filenames (not paths) used in views
- -
- Copyright 2014-2023 Joey Hess <id@joeyh.name> - Copyright 2014-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -19,6 +19,7 @@ module Annex.View.ViewedFile (
import Annex.Common import Annex.Common
import Utility.QuickCheck import Utility.QuickCheck
import Backend.Utilities (maxExtensions)
import qualified Data.ByteString as S import qualified Data.ByteString as S
@ -37,10 +38,12 @@ type MkViewedFile = FilePath -> ViewedFile
- So, from dir/subdir/file.foo, generate file_%dir%subdir%.foo - So, from dir/subdir/file.foo, generate file_%dir%subdir%.foo
-} -}
viewedFileFromReference :: GitConfig -> MkViewedFile viewedFileFromReference :: GitConfig -> MkViewedFile
viewedFileFromReference g = viewedFileFromReference' (annexMaxExtensionLength g) viewedFileFromReference g = viewedFileFromReference'
(annexMaxExtensionLength g)
(annexMaxExtensions g)
viewedFileFromReference' :: Maybe Int -> MkViewedFile viewedFileFromReference' :: Maybe Int -> Maybe Int -> MkViewedFile
viewedFileFromReference' maxextlen f = concat $ viewedFileFromReference' maxextlen maxextensions f = concat $
[ escape (fromRawFilePath base') [ escape (fromRawFilePath base')
, if null dirs then "" else "_%" ++ intercalate "%" (map escape dirs) ++ "%" , if null dirs then "" else "_%" ++ intercalate "%" (map escape dirs) ++ "%"
, escape $ fromRawFilePath $ S.concat extensions' , escape $ fromRawFilePath $ S.concat extensions'
@ -51,11 +54,12 @@ viewedFileFromReference' maxextlen f = concat $
(base, extensions) = case maxextlen of (base, extensions) = case maxextlen of
Nothing -> splitShortExtensions (toRawFilePath basefile') Nothing -> splitShortExtensions (toRawFilePath basefile')
Just n -> splitShortExtensions' (n+1) (toRawFilePath basefile') Just n -> splitShortExtensions' (n+1) (toRawFilePath basefile')
{- Limit to two extensions maximum. -} {- Limit number of extensions. -}
maxextensions' = fromMaybe maxExtensions maxextensions
(base', extensions') (base', extensions')
| length extensions <= 2 = (base, extensions) | length extensions <= maxextensions' = (base, extensions)
| otherwise = | otherwise =
let (es,more) = splitAt 2 (reverse extensions) let (es,more) = splitAt maxextensions' (reverse extensions)
in (base <> mconcat (reverse more), reverse es) in (base <> mconcat (reverse more), reverse es)
{- On Windows, if the filename looked like "dir/c:foo" then {- On Windows, if the filename looked like "dir/c:foo" then
- basefile would look like it contains a drive letter, which will - basefile would look like it contains a drive letter, which will
@ -101,7 +105,8 @@ prop_viewedFile_roundtrips tf
-- Relative filenames wanted, not directories. -- Relative filenames wanted, not directories.
| any (isPathSeparator) (end f ++ beginning f) = True | any (isPathSeparator) (end f ++ beginning f) = True
| isAbsolute f || isDrive f = True | isAbsolute f || isDrive f = True
| otherwise = dir == dirFromViewedFile (viewedFileFromReference' Nothing f) | otherwise = dir == dirFromViewedFile
(viewedFileFromReference' Nothing Nothing f)
where where
f = fromTestableFilePath tf f = fromTestableFilePath tf
dir = joinPath $ beginning $ splitDirectories f dir = joinPath $ beginning $ splitDirectories f

View file

@ -170,11 +170,14 @@ needsUpgrade key = or
] ]
trivialMigrate :: Key -> Backend -> AssociatedFile -> Bool -> Annex (Maybe Key) trivialMigrate :: Key -> Backend -> AssociatedFile -> Bool -> Annex (Maybe Key)
trivialMigrate oldkey newbackend afile _inannex = trivialMigrate' oldkey newbackend afile trivialMigrate oldkey newbackend afile _inannex = do
<$> (annexMaxExtensionLength <$> Annex.getGitConfig) c <- Annex.getGitConfig
return $ trivialMigrate' oldkey newbackend afile
(annexMaxExtensionLength c)
(annexMaxExtensions c)
trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Key trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Int -> Maybe Key
trivialMigrate' oldkey newbackend afile maxextlen trivialMigrate' oldkey newbackend afile maxextlen maxexts
{- Fast migration from hashE to hash backend. -} {- Fast migration from hashE to hash backend. -}
| migratable && hasExt oldvariety = Just $ alterKey oldkey $ \d -> d | migratable && hasExt oldvariety = Just $ alterKey oldkey $ \d -> d
{ keyName = S.toShort (keyHash oldkey) { keyName = S.toShort (keyHash oldkey)
@ -185,7 +188,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
AssociatedFile Nothing -> Nothing AssociatedFile Nothing -> Nothing
AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d
{ keyName = S.toShort $ keyHash oldkey { keyName = S.toShort $ keyHash oldkey
<> selectExtension maxextlen file <> selectExtension maxextlen maxexts file
, keyVariety = newvariety , keyVariety = newvariety
} }
{- Upgrade to fix bad previous migration that created a {- Upgrade to fix bad previous migration that created a

View file

@ -45,20 +45,24 @@ genKeyName s
- file that the key was generated from. -} - file that the key was generated from. -}
addE :: KeySource -> (KeyVariety -> KeyVariety) -> Key -> Annex Key addE :: KeySource -> (KeyVariety -> KeyVariety) -> Key -> Annex Key
addE source sethasext k = do addE source sethasext k = do
maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig c <- Annex.getGitConfig
let ext = selectExtension maxlen (keyFilename source) let ext = selectExtension
(annexMaxExtensionLength c)
(annexMaxExtensions c)
(keyFilename source)
return $ alterKey k $ \d -> d return $ alterKey k $ \d -> d
{ keyName = keyName d <> S.toShort ext { keyName = keyName d <> S.toShort ext
, keyVariety = sethasext (keyVariety d) , keyVariety = sethasext (keyVariety d)
} }
selectExtension :: Maybe Int -> RawFilePath -> S.ByteString selectExtension :: Maybe Int -> Maybe Int -> RawFilePath -> S.ByteString
selectExtension maxlen f selectExtension maxlen maxextensions f
| null es = "" | null es = ""
| otherwise = S.intercalate "." ("":es) | otherwise = S.intercalate "." ("":es)
where where
es = filter (not . S.null) $ reverse $ es = filter (not . S.null) $ reverse $
take 2 $ filter (S.all validInExtension) $ take (fromMaybe maxExtensions maxextensions) $
filter (S.all validInExtension) $
takeWhile shortenough $ takeWhile shortenough $
reverse $ S.split (fromIntegral (ord '.')) (P.takeExtensions f') reverse $ S.split (fromIntegral (ord '.')) (P.takeExtensions f')
shortenough e = S.length e <= fromMaybe maxExtensionLen maxlen shortenough e = S.length e <= fromMaybe maxExtensionLen maxlen
@ -75,3 +79,6 @@ validInExtension c
maxExtensionLen :: Int maxExtensionLen :: Int
maxExtensionLen = 4 -- long enough for "jpeg" maxExtensionLen = 4 -- long enough for "jpeg"
maxExtensions :: Int
maxExtensions = 2 -- include both extensions of "tar.gz"

View file

@ -18,6 +18,8 @@ git-annex (10.20240228) UNRELEASED; urgency=medium
* Added rclone special remote, which can be used without needing * Added rclone special remote, which can be used without needing
to install the git-annex-remote-rclone program. This needs to install the git-annex-remote-rclone program. This needs
a new version of rclone, which supports "rclone gitannex". a new version of rclone, which supports "rclone gitannex".
* annex.maxextensions configuration controls how many filename
extensions to preserve.
-- Joey Hess <id@joeyh.name> Tue, 27 Feb 2024 13:07:10 -0400 -- Joey Hess <id@joeyh.name> Tue, 27 Feb 2024 13:07:10 -0400

View file

@ -136,6 +136,7 @@ data GitConfig = GitConfig
, annexAllowedIPAddresses :: String , annexAllowedIPAddresses :: String
, annexAllowUnverifiedDownloads :: Bool , annexAllowUnverifiedDownloads :: Bool
, annexMaxExtensionLength :: Maybe Int , annexMaxExtensionLength :: Maybe Int
, annexMaxExtensions :: Maybe Int
, annexJobs :: Concurrency , annexJobs :: Concurrency
, annexCacheCreds :: Bool , annexCacheCreds :: Bool
, annexAutoUpgradeRepository :: Bool , annexAutoUpgradeRepository :: Bool
@ -244,6 +245,7 @@ extractGitConfig configsource r = GitConfig
, annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $ , annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
getmaybe (annexConfig "security.allow-unverified-downloads") getmaybe (annexConfig "security.allow-unverified-downloads")
, annexMaxExtensionLength = getmayberead (annexConfig "maxextensionlength") , annexMaxExtensionLength = getmayberead (annexConfig "maxextensionlength")
, annexMaxExtensions = getmayberead (annexConfig "maxextensions")
, annexJobs = fromMaybe NonConcurrent $ , annexJobs = fromMaybe NonConcurrent $
parseConcurrency =<< getmaybe (annexConfig "jobs") parseConcurrency =<< getmaybe (annexConfig "jobs")
, annexCacheCreds = getbool (annexConfig "cachecreds") True , annexCacheCreds = getbool (annexConfig "cachecreds") True

View file

@ -873,8 +873,16 @@ repository, using [[git-annex-config]]. See its man page for a list.)
and also when generating a view branch. and also when generating a view branch.
The default length is 4, which allows extensions like "jpeg". The dot before The default length is 4, which allows extensions like "jpeg". The dot before
the extension is not counted part of its length. At most two extensions the extension is not counted part of its length.
at the end of a filename will be preserved, e.g. .gz or .tar.gz .
* `annex.maxextensions`
Maximum number of filename extensions to preserve when using a backend
that preserves filename extensions, and also when generating a view
branch.
The default is 2, which allows for compound extensions like ".tar.gz".
When set to 1, it will only preserve the last extension, eg ".gz".
* `annex.diskreserve` * `annex.diskreserve`

View file

@ -12,3 +12,5 @@ Just throwing against the wall to see if sticks
[[!meta author=yoh]] [[!meta author=yoh]]
[[!tag projects/repronim]] [[!tag projects/repronim]]
> added annex.maxextensions config, [[done]] --[[Joey]]

View file

@ -9,7 +9,7 @@ extension. For a .mkv file, I'd guess most video players don't care about
the extension. the extension.
annex.maxextensionlength won't help here, but I think it makes sense to add annex.maxextensionlength won't help here, but I think it makes sense to add
an analagous annex.maxextensioncount which would default to 2 (as it an analagous annex.maxextensions which would default to 2 (as it
currently does to handle .tar.gz) but you could set to 1. currently does to handle .tar.gz) but you could set to 1.
It might also be a reasonable argument that filename extensions are not It might also be a reasonable argument that filename extensions are not