From 2b5fa091e2c10041d92eddda55225dc4a564e66f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 24 Mar 2023 13:53:51 -0400 Subject: [PATCH] annex.maxextensionlength for view view: Support annex.maxextensionlength when generating filenames for the view branch. Note that refining an existing view will reuse the extension length that was configured when initially constructing the view. This is necessarily the case because it reuses the filenames. Also view files used to have all extensions at the end, no matter how many there were. Since annex.maxextensionlength's documentation includes that it's limited to 2 extensions, I made it consistent with that. Sponsored-by: k0ld on Patreon --- Annex/View.hs | 9 ++++--- Annex/View/ViewedFile.hs | 27 +++++++++++++------ CHANGELOG | 2 ++ Utility/Path.hs | 1 + doc/git-annex-view.mdwn | 6 +++++ doc/git-annex.mdwn | 8 +++--- ..._2bcfc677da72637f34904b84fdd95c10._comment | 9 +++++++ 7 files changed, 48 insertions(+), 14 deletions(-) create mode 100644 doc/todo/Configuring_metadata_view_filenames/comment_8_2bcfc677da72637f34904b84fdd95c10._comment diff --git a/Annex/View.hs b/Annex/View.hs index 65db159710..b47e34564b 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -387,7 +387,7 @@ prop_view_roundtrips (AssociatedFile Nothing) _ _ = True prop_view_roundtrips (AssociatedFile (Just f)) metadata visible = or [ B.null (P.takeFileName f) && B.null (P.takeDirectory f) , viewTooLarge view - , all hasfields (viewedFiles view viewedFileFromReference (fromRawFilePath f) metadata) + , all hasfields (viewedFiles view (viewedFileFromReference' Nothing) (fromRawFilePath f) metadata) ] where view = View (Git.Ref "foo") $ @@ -421,7 +421,9 @@ getViewedFileMetaData = getDirMetaData . dirFromViewedFile . takeFileName - branch for the view. -} applyView :: View -> Maybe Adjustment -> Annex Git.Branch -applyView = applyView' viewedFileFromReference getWorkTreeMetaData +applyView v ma = do + gc <- Annex.getGitConfig + applyView' (viewedFileFromReference gc) getWorkTreeMetaData v ma {- Generates a new branch for a View, which must be a more narrow - version of the View originally used to generate the currently @@ -553,7 +555,8 @@ updateView view madj = do Git.LsTree.LsTreeRecursive (Git.LsTree.LsTreeLong True) (viewParentBranch view) - applyView'' viewedFileFromReference getWorkTreeMetaData view madj l clean $ + gc <- Annex.getGitConfig + applyView'' (viewedFileFromReference gc) getWorkTreeMetaData view madj l clean $ \ti -> do let ref = Git.Ref.branchFileRef (viewParentBranch view) (getTopFilePath (Git.LsTree.file ti)) diff --git a/Annex/View/ViewedFile.hs b/Annex/View/ViewedFile.hs index c804a50c0b..6aa992babb 100644 --- a/Annex/View/ViewedFile.hs +++ b/Annex/View/ViewedFile.hs @@ -1,6 +1,6 @@ {- filenames (not paths) used in views - - - Copyright 2014 Joey Hess + - Copyright 2014-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -11,6 +11,7 @@ module Annex.View.ViewedFile ( ViewedFile, MkViewedFile, viewedFileFromReference, + viewedFileFromReference', viewedFileReuse, dirFromViewedFile, prop_viewedFile_roundtrips, @@ -35,17 +36,27 @@ type MkViewedFile = FilePath -> ViewedFile - - So, from dir/subdir/file.foo, generate file_%dir%subdir%.foo -} -viewedFileFromReference :: MkViewedFile -viewedFileFromReference f = concat $ - [ escape (fromRawFilePath base) +viewedFileFromReference :: GitConfig -> MkViewedFile +viewedFileFromReference g = viewedFileFromReference' (annexMaxExtensionLength g) + +viewedFileFromReference' :: Maybe Int -> MkViewedFile +viewedFileFromReference' maxextlen f = concat $ + [ escape (fromRawFilePath base') , if null dirs then "" else "_%" ++ intercalate "%" (map escape dirs) ++ "%" - , escape $ fromRawFilePath $ S.concat extensions + , escape $ fromRawFilePath $ S.concat extensions' ] where (path, basefile) = splitFileName f dirs = filter (/= ".") $ map dropTrailingPathSeparator (splitPath path) - (base, extensions) = splitShortExtensions (toRawFilePath basefile') - + (base, extensions) = case maxextlen of + Nothing -> splitShortExtensions (toRawFilePath basefile') + Just n -> splitShortExtensions' (n+1) (toRawFilePath basefile') + {- Limit to two extensions maximum. -} + (base', extensions') + | length extensions <= 2 = (base, extensions) + | otherwise = + let (es,more) = splitAt 2 (reverse extensions) + in (base <> mconcat (reverse more), reverse es) {- On Windows, if the filename looked like "dir/c:foo" then - basefile would look like it contains a drive letter, which will - not work. There cannot really be a filename like that, probably, @@ -90,7 +101,7 @@ prop_viewedFile_roundtrips tf -- Relative filenames wanted, not directories. | any (isPathSeparator) (end f ++ beginning f) = True | isAbsolute f || isDrive f = True - | otherwise = dir == dirFromViewedFile (viewedFileFromReference f) + | otherwise = dir == dirFromViewedFile (viewedFileFromReference' Nothing f) where f = fromTestableFilePath tf dir = joinPath $ beginning $ splitDirectories f diff --git a/CHANGELOG b/CHANGELOG index 1a2f35bad3..3b7f40eb04 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,8 @@ git-annex (10.20230322) UNRELEASED; urgency=medium drop when annex.adjustedbranchrefresh=1 * Avoid leaving repo with a detached head when there is a failure checking out an updated adjusted branch. + * view: Support annex.maxextensionlength when generating filenames for + the view branch. -- Joey Hess Thu, 23 Mar 2023 15:04:41 -0400 diff --git a/Utility/Path.hs b/Utility/Path.hs index dcb21400ea..64ef076ff9 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -20,6 +20,7 @@ module Utility.Path ( runSegmentPaths', dotfile, splitShortExtensions, + splitShortExtensions', relPathDirToFileAbs, inSearchPath, searchPath, diff --git a/doc/git-annex-view.mdwn b/doc/git-annex-view.mdwn index b8e126403b..f2677019a7 100644 --- a/doc/git-annex-view.mdwn +++ b/doc/git-annex-view.mdwn @@ -44,6 +44,12 @@ into the `_` directory and committing will unset the metadata. The name of the `_` directory can be changed using the annex.viewunsetdirectory git config. +Filenames in the view branch include their path within the original branch, to +ensure that they are unique. The path comes after the main filename, and +before any extensions. For example, "foo/bar.baz" will have a name +like "bar_%foo%.baz". annex.maxextensionlength can be used to configure +what is treated as an extension. + # OPTIONS * The [[git-annex-common-options]](1) can be used. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 5e63631be7..aac408d3ff 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -825,9 +825,11 @@ repository, using [[git-annex-config]]. See its man page for a list.) * `annex.maxextensionlength` - Maximum length, in bytes, of what is considered a filename extension when - adding a file to a backend that preserves filename extensions. The - default length is 4, which allows extensions like "jpeg". The dot before + Maximum length, in bytes, of what is considered a filename extension. + This is used when adding a file to a backend that preserves filename extensions, + and also when generating a view branch. + + The default length is 4, which allows extensions like "jpeg". The dot before the extension is not counted part of its length. At most two extensions at the end of a filename will be preserved, e.g. .gz or .tar.gz . diff --git a/doc/todo/Configuring_metadata_view_filenames/comment_8_2bcfc677da72637f34904b84fdd95c10._comment b/doc/todo/Configuring_metadata_view_filenames/comment_8_2bcfc677da72637f34904b84fdd95c10._comment new file mode 100644 index 0000000000..b379529f5c --- /dev/null +++ b/doc/todo/Configuring_metadata_view_filenames/comment_8_2bcfc677da72637f34904b84fdd95c10._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 8""" + date="2023-03-24T17:48:43Z" + content=""" +I've made git-annex view use `annex.maxextensionlength`. Note that refining +an existing view will reuse the extension length that was configured when +initially constructing the view. +"""]]