annex.maxextensions configuration

Controls how many filename extensions to preserve.

Sponsored-by: the NIH-funded NICEMAN (ReproNim TR&D3) project
This commit is contained in:
Joey Hess 2024-04-18 14:23:05 -04:00
parent b700c48b15
commit c410b2bb73
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 51 additions and 22 deletions

View file

@ -387,7 +387,7 @@ prop_view_roundtrips (AssociatedFile Nothing) _ _ = True
prop_view_roundtrips (AssociatedFile (Just f)) metadata visible = or
[ B.null (P.takeFileName f) && B.null (P.takeDirectory f)
, viewTooLarge view
, all hasfields (viewedFiles view (viewedFileFromReference' Nothing) (fromRawFilePath f) metadata)
, all hasfields (viewedFiles view (viewedFileFromReference' Nothing Nothing) (fromRawFilePath f) metadata)
]
where
view = View (Git.Ref "foo") $

View file

@ -1,6 +1,6 @@
{- filenames (not paths) used in views
-
- Copyright 2014-2023 Joey Hess <id@joeyh.name>
- Copyright 2014-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -19,6 +19,7 @@ module Annex.View.ViewedFile (
import Annex.Common
import Utility.QuickCheck
import Backend.Utilities (maxExtensions)
import qualified Data.ByteString as S
@ -37,10 +38,12 @@ type MkViewedFile = FilePath -> ViewedFile
- So, from dir/subdir/file.foo, generate file_%dir%subdir%.foo
-}
viewedFileFromReference :: GitConfig -> MkViewedFile
viewedFileFromReference g = viewedFileFromReference' (annexMaxExtensionLength g)
viewedFileFromReference g = viewedFileFromReference'
(annexMaxExtensionLength g)
(annexMaxExtensions g)
viewedFileFromReference' :: Maybe Int -> MkViewedFile
viewedFileFromReference' maxextlen f = concat $
viewedFileFromReference' :: Maybe Int -> Maybe Int -> MkViewedFile
viewedFileFromReference' maxextlen maxextensions f = concat $
[ escape (fromRawFilePath base')
, if null dirs then "" else "_%" ++ intercalate "%" (map escape dirs) ++ "%"
, escape $ fromRawFilePath $ S.concat extensions'
@ -51,11 +54,12 @@ viewedFileFromReference' maxextlen f = concat $
(base, extensions) = case maxextlen of
Nothing -> splitShortExtensions (toRawFilePath basefile')
Just n -> splitShortExtensions' (n+1) (toRawFilePath basefile')
{- Limit to two extensions maximum. -}
{- Limit number of extensions. -}
maxextensions' = fromMaybe maxExtensions maxextensions
(base', extensions')
| length extensions <= 2 = (base, extensions)
| length extensions <= maxextensions' = (base, extensions)
| otherwise =
let (es,more) = splitAt 2 (reverse extensions)
let (es,more) = splitAt maxextensions' (reverse extensions)
in (base <> mconcat (reverse more), reverse es)
{- On Windows, if the filename looked like "dir/c:foo" then
- basefile would look like it contains a drive letter, which will
@ -101,7 +105,8 @@ prop_viewedFile_roundtrips tf
-- Relative filenames wanted, not directories.
| any (isPathSeparator) (end f ++ beginning f) = True
| isAbsolute f || isDrive f = True
| otherwise = dir == dirFromViewedFile (viewedFileFromReference' Nothing f)
| otherwise = dir == dirFromViewedFile
(viewedFileFromReference' Nothing Nothing f)
where
f = fromTestableFilePath tf
dir = joinPath $ beginning $ splitDirectories f

View file

@ -170,11 +170,14 @@ needsUpgrade key = or
]
trivialMigrate :: Key -> Backend -> AssociatedFile -> Bool -> Annex (Maybe Key)
trivialMigrate oldkey newbackend afile _inannex = trivialMigrate' oldkey newbackend afile
<$> (annexMaxExtensionLength <$> Annex.getGitConfig)
trivialMigrate oldkey newbackend afile _inannex = do
c <- Annex.getGitConfig
return $ trivialMigrate' oldkey newbackend afile
(annexMaxExtensionLength c)
(annexMaxExtensions c)
trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Key
trivialMigrate' oldkey newbackend afile maxextlen
trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Int -> Maybe Key
trivialMigrate' oldkey newbackend afile maxextlen maxexts
{- Fast migration from hashE to hash backend. -}
| migratable && hasExt oldvariety = Just $ alterKey oldkey $ \d -> d
{ keyName = S.toShort (keyHash oldkey)
@ -185,7 +188,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
AssociatedFile Nothing -> Nothing
AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d
{ keyName = S.toShort $ keyHash oldkey
<> selectExtension maxextlen file
<> selectExtension maxextlen maxexts file
, keyVariety = newvariety
}
{- Upgrade to fix bad previous migration that created a

View file

@ -45,20 +45,24 @@ genKeyName s
- file that the key was generated from. -}
addE :: KeySource -> (KeyVariety -> KeyVariety) -> Key -> Annex Key
addE source sethasext k = do
maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig
let ext = selectExtension maxlen (keyFilename source)
c <- Annex.getGitConfig
let ext = selectExtension
(annexMaxExtensionLength c)
(annexMaxExtensions c)
(keyFilename source)
return $ alterKey k $ \d -> d
{ keyName = keyName d <> S.toShort ext
, keyVariety = sethasext (keyVariety d)
}
selectExtension :: Maybe Int -> RawFilePath -> S.ByteString
selectExtension maxlen f
selectExtension :: Maybe Int -> Maybe Int -> RawFilePath -> S.ByteString
selectExtension maxlen maxextensions f
| null es = ""
| otherwise = S.intercalate "." ("":es)
where
es = filter (not . S.null) $ reverse $
take 2 $ filter (S.all validInExtension) $
take (fromMaybe maxExtensions maxextensions) $
filter (S.all validInExtension) $
takeWhile shortenough $
reverse $ S.split (fromIntegral (ord '.')) (P.takeExtensions f')
shortenough e = S.length e <= fromMaybe maxExtensionLen maxlen
@ -75,3 +79,6 @@ validInExtension c
maxExtensionLen :: Int
maxExtensionLen = 4 -- long enough for "jpeg"
maxExtensions :: Int
maxExtensions = 2 -- include both extensions of "tar.gz"

View file

@ -18,6 +18,8 @@ git-annex (10.20240228) UNRELEASED; urgency=medium
* Added rclone special remote, which can be used without needing
to install the git-annex-remote-rclone program. This needs
a new version of rclone, which supports "rclone gitannex".
* annex.maxextensions configuration controls how many filename
extensions to preserve.
-- Joey Hess <id@joeyh.name> Tue, 27 Feb 2024 13:07:10 -0400

View file

@ -136,6 +136,7 @@ data GitConfig = GitConfig
, annexAllowedIPAddresses :: String
, annexAllowUnverifiedDownloads :: Bool
, annexMaxExtensionLength :: Maybe Int
, annexMaxExtensions :: Maybe Int
, annexJobs :: Concurrency
, annexCacheCreds :: Bool
, annexAutoUpgradeRepository :: Bool
@ -244,6 +245,7 @@ extractGitConfig configsource r = GitConfig
, annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
getmaybe (annexConfig "security.allow-unverified-downloads")
, annexMaxExtensionLength = getmayberead (annexConfig "maxextensionlength")
, annexMaxExtensions = getmayberead (annexConfig "maxextensions")
, annexJobs = fromMaybe NonConcurrent $
parseConcurrency =<< getmaybe (annexConfig "jobs")
, annexCacheCreds = getbool (annexConfig "cachecreds") True

View file

@ -873,8 +873,16 @@ repository, using [[git-annex-config]]. See its man page for a list.)
and also when generating a view branch.
The default length is 4, which allows extensions like "jpeg". The dot before
the extension is not counted part of its length. At most two extensions
at the end of a filename will be preserved, e.g. .gz or .tar.gz .
the extension is not counted part of its length.
* `annex.maxextensions`
Maximum number of filename extensions to preserve when using a backend
that preserves filename extensions, and also when generating a view
branch.
The default is 2, which allows for compound extensions like ".tar.gz".
When set to 1, it will only preserve the last extension, eg ".gz".
* `annex.diskreserve`

View file

@ -12,3 +12,5 @@ Just throwing against the wall to see if sticks
[[!meta author=yoh]]
[[!tag projects/repronim]]
> added annex.maxextensions config, [[done]] --[[Joey]]

View file

@ -9,7 +9,7 @@ extension. For a .mkv file, I'd guess most video players don't care about
the extension.
annex.maxextensionlength won't help here, but I think it makes sense to add
an analagous annex.maxextensioncount which would default to 2 (as it
an analagous annex.maxextensions which would default to 2 (as it
currently does to handle .tar.gz) but you could set to 1.
It might also be a reasonable argument that filename extensions are not