From 4ecba916a14e02dd62f8ba4257db810fa859f017 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 24 Sep 2018 12:07:46 -0400 Subject: [PATCH] annex.maxextensionlength Added annex.maxextensionlength for use cases where extensions longer than 4 characters are needed. This commit was sponsored by Henrik Riomar on Patreon. --- Backend/Hash.hs | 31 +++++++++++++------ Backend/WORM.hs | 6 ++-- CHANGELOG | 2 ++ Command/Migrate.hs | 5 +-- Types/Backend.hs | 2 +- Types/GitConfig.hs | 2 ++ doc/git-annex.mdwn | 7 +++++ doc/todo/support_longer_file_extensions.mdwn | 2 ++ ..._a01fbe5b4f72989532051f5e1bb55104._comment | 7 +++++ 9 files changed, 48 insertions(+), 16 deletions(-) create mode 100644 doc/todo/support_longer_file_extensions/comment_2_a01fbe5b4f72989532051f5e1bb55104._comment diff --git a/Backend/Hash.hs b/Backend/Hash.hs index f9dddaaa11..7d8e335854 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -98,13 +98,16 @@ keyValue hash source = do keyValueE :: Hash -> KeySource -> Annex (Maybe Key) keyValueE hash source = keyValue hash source >>= maybe (return Nothing) addE where - addE k = return $ Just $ k - { keyName = keyName k ++ selectExtension (keyFilename source) - , keyVariety = hashKeyVariety hash (HasExt True) - } + addE k = do + maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig + let ext = selectExtension maxlen (keyFilename source) + return $ Just $ k + { keyName = keyName k ++ ext + , keyVariety = hashKeyVariety hash (HasExt True) + } -selectExtension :: FilePath -> String -selectExtension f +selectExtension :: Maybe Int -> FilePath -> String +selectExtension maxlen f | null es = "" | otherwise = intercalate "." ("":es) where @@ -112,7 +115,10 @@ selectExtension f take 2 $ filter (all validInExtension) $ takeWhile shortenough $ reverse $ splitc '.' $ takeExtensions f - shortenough e = length e <= 4 -- long enough for "jpeg" + shortenough e = length e <= fromMaybe maxExtensionLen maxlen + +maxExtensionLen :: Int +maxExtensionLen = 4 -- long enough for "jpeg" {- A key's checksum is checked during fsck when it's content is present - except for in fast mode. -} @@ -162,8 +168,12 @@ needsUpgrade key = or , not (hasExt (keyVariety key)) && keyHash key /= keyName key ] -trivialMigrate :: Key -> Backend -> AssociatedFile -> Maybe Key -trivialMigrate oldkey newbackend afile +trivialMigrate :: Key -> Backend -> AssociatedFile -> Annex (Maybe Key) +trivialMigrate oldkey newbackend afile = trivialMigrate' oldkey newbackend afile + <$> (annexMaxExtensionLength <$> Annex.getGitConfig) + +trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Key +trivialMigrate' oldkey newbackend afile maxextlen {- Fast migration from hashE to hash backend. -} | migratable && hasExt oldvariety = Just $ oldkey { keyName = keyHash oldkey @@ -173,7 +183,8 @@ trivialMigrate oldkey newbackend afile | migratable && hasExt newvariety = case afile of AssociatedFile Nothing -> Nothing AssociatedFile (Just file) -> Just $ oldkey - { keyName = keyHash oldkey ++ selectExtension file + { keyName = keyHash oldkey + ++ selectExtension maxextlen file , keyVariety = newvariety } {- Upgrade to fix bad previous migration that created a diff --git a/Backend/WORM.hs b/Backend/WORM.hs index 678784fdf6..689cc1d904 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -47,11 +47,11 @@ keyValue source = do needsUpgrade :: Key -> Bool needsUpgrade key = ' ' `elem` keyName key -removeSpaces :: Key -> Backend -> AssociatedFile -> Maybe Key +removeSpaces :: Key -> Backend -> AssociatedFile -> Annex (Maybe Key) removeSpaces oldkey newbackend _ - | migratable = Just $ oldkey + | migratable = return $ Just $ oldkey { keyName = reSanitizeKeyName (keyName oldkey) } - | otherwise = Nothing + | otherwise = return Nothing where migratable = oldvariety == newvariety oldvariety = keyVariety oldkey diff --git a/CHANGELOG b/CHANGELOG index 02cdfafb3d..3acc10663e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -11,6 +11,8 @@ git-annex (6.20180914) UNRELEASED; urgency=medium as the rest of git's interface is used in newline-safe ways. * Added -z option to git-annex commands that use --batch, useful for supporting filenames containing newlines. + * Added annex.maxextensionlength for use cases where extensions longer + than 4 characters are needed. * More FreeBSD build fixes. [ Yaroslav Halchenko ] diff --git a/Command/Migrate.hs b/Command/Migrate.hs index 1f0a62bcb2..bc5c426fc8 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -65,7 +65,7 @@ upgradableKey backend key = isNothing (keySize key) || backendupgradable - generated. -} perform :: FilePath -> Key -> Backend -> Backend -> CommandPerform -perform file oldkey oldbackend newbackend = go =<< genkey +perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbackend) where go Nothing = stop go (Just (newkey, knowngoodcontent)) @@ -84,7 +84,8 @@ perform file oldkey oldbackend newbackend = go =<< genkey next $ Command.ReKey.cleanup file oldkey newkey , error "failed" ) - genkey = case maybe Nothing (\fm -> fm oldkey newbackend afile) (fastMigrate oldbackend) of + genkey Nothing = return Nothing + genkey (Just fm) = fm oldkey newbackend afile >>= \case Just newkey -> return $ Just (newkey, True) Nothing -> do content <- calcRepo $ gitAnnexLocation oldkey diff --git a/Types/Backend.hs b/Types/Backend.hs index f1d8919a49..10b7b47fc8 100644 --- a/Types/Backend.hs +++ b/Types/Backend.hs @@ -21,7 +21,7 @@ data BackendA a = Backend , canUpgradeKey :: Maybe (Key -> Bool) -- Checks if there is a fast way to migrate a key to a different -- backend (ie, without re-hashing). - , fastMigrate :: Maybe (Key -> BackendA a -> AssociatedFile -> Maybe Key) + , fastMigrate :: Maybe (Key -> BackendA a -> AssociatedFile -> a (Maybe Key)) -- Checks if a key is known (or assumed) to always refer to the -- same data. , isStableKey :: Key -> Bool diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 31d6fbe411..0677c6dbd9 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -98,6 +98,7 @@ data GitConfig = GitConfig , annexAllowedUrlSchemes :: S.Set Scheme , annexAllowedHttpAddresses :: String , annexAllowUnverifiedDownloads :: Bool + , annexMaxExtensionLength :: Maybe Int , coreSymlinks :: Bool , coreSharedRepository :: SharedRepository , receiveDenyCurrentBranch :: DenyCurrentBranch @@ -171,6 +172,7 @@ extractGitConfig r = GitConfig getmaybe (annex "security.allowed-http-addresses") , annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $ getmaybe (annex "security.allow-unverified-downloads") + , annexMaxExtensionLength = getmayberead (annex "maxextensionlength") , coreSymlinks = getbool "core.symlinks" True , coreSharedRepository = getSharedRepository r , receiveDenyCurrentBranch = getDenyCurrentBranch r diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index ad1e7b44e0..44ac03f8ba 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -865,6 +865,13 @@ Here are all the supported configuration settings. To configure the behavior in new clones of the repository, this can be set in [[git-annex-config]]. +* `annex.maxextensionlength` + + Maximum length of what is considered a filename extension when adding a + file to a backend that preserves filename extensions. The default length + is 4, which allows extensions like "jpeg". The dot before the extension + is not counted part of its length. + * `annex.diskreserve` Amount of disk space to reserve. Disk space is checked when transferring diff --git a/doc/todo/support_longer_file_extensions.mdwn b/doc/todo/support_longer_file_extensions.mdwn index 438d4da960..338cf295d7 100644 --- a/doc/todo/support_longer_file_extensions.mdwn +++ b/doc/todo/support_longer_file_extensions.mdwn @@ -3,3 +3,5 @@ file myfile.fasta becomes the symlink .git/annex/objects/xx/xx/key/myfile.fasta .git/annex/objects/xx/xx/key/anotherfile.fasta which is a hardlink to myfile.fasta . An added plus is that the symlinks checked into git typically becomes shorter. Or, for better backwards compatibility, the symlinks checked into git don't change, but .git/annex/objects/xx/xx/key/key becomes a symlink to .git/annex/objects/xx/xx/key/myfile.fasta . However, if there is anotherfile.fasta with the same key, its symlink will still end up terminating at myfile.fasta rather than anotherfile.fasta . It's useful to preserve full filenames, because it's not uncommon to e.g. encode parameter information in filenames (myresult.threshold100.dat); and it's not uncommon to call something like python's os.path.realpath to unwind symlink chains before processing a file. + +> [[done]] --[[Joey]] diff --git a/doc/todo/support_longer_file_extensions/comment_2_a01fbe5b4f72989532051f5e1bb55104._comment b/doc/todo/support_longer_file_extensions/comment_2_a01fbe5b4f72989532051f5e1bb55104._comment new file mode 100644 index 0000000000..541dd24a35 --- /dev/null +++ b/doc/todo/support_longer_file_extensions/comment_2_a01fbe5b4f72989532051f5e1bb55104._comment @@ -0,0 +1,7 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2018-09-24T15:50:47Z" + content=""" +Added annex.maxextensionlength configuration. +"""]]