New matching options --excludesamecontent and --includesamecontent
The normalisation of filenames turns out to be the tricky part here, because the associated files coming out of the keys db may look like "./foo/bar" or "../bar". For the former to match a glob like "foo/*", it needs to be normalised. Note that, on windows, normalise "./foo/bar" = "foo\\bar" which a glob like "foo/*" won't match. So the glob is matched a second time, on the toInternalGitPath, so allowing the user to provide a glob with the slashes in either direction. However, this still won't support some wacky edge cases like the user providing a glob of "foo/bar\\*" Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
parent
cd73fcc92c
commit
b5f5475ed6
5 changed files with 102 additions and 3 deletions
67
Limit.hs
67
Limit.hs
|
@ -1,10 +1,12 @@
|
|||
{- user-specified limits on files to act on
|
||||
-
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Limit where
|
||||
|
||||
import Annex.Common
|
||||
|
@ -29,16 +31,20 @@ import Logs.MetaData
|
|||
import Logs.Group
|
||||
import Logs.Unused
|
||||
import Logs.Location
|
||||
import Annex.CatFile
|
||||
import Git.FilePath
|
||||
import Git.Types (RefDate(..))
|
||||
import Utility.Glob
|
||||
import Utility.HumanTime
|
||||
import Utility.DataUnits
|
||||
import qualified Database.Keys
|
||||
import qualified Utility.RawFilePath as R
|
||||
import Backend
|
||||
|
||||
import Data.Time.Clock.POSIX
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Map as M
|
||||
import qualified System.FilePath.ByteString as P
|
||||
|
||||
{- Some limits can look at the current status of files on
|
||||
- disk, or in the annex. This allows controlling which happens. -}
|
||||
|
@ -122,6 +128,65 @@ matchGlobFile glob = go
|
|||
Nothing -> False
|
||||
go (MatchingUserInfo p) = matchGlob cglob <$> getUserInfo (userProvidedFilePath p)
|
||||
|
||||
{- Add a limit to skip files when there is no other file using the same
|
||||
- content, with a name matching the glob. -}
|
||||
addIncludeSameContent :: String -> Annex ()
|
||||
addIncludeSameContent = addLimit . limitIncludeSameContent
|
||||
|
||||
limitIncludeSameContent :: MkLimit Annex
|
||||
limitIncludeSameContent glob = Right $ MatchFiles
|
||||
{ matchAction = const $ matchSameContentGlob glob
|
||||
, matchNeedsFileName = True
|
||||
, matchNeedsFileContent = False
|
||||
, matchNeedsKey = False
|
||||
, matchNeedsLocationLog = False
|
||||
}
|
||||
|
||||
{- Add a limit to skip files when there is no other file using the same
|
||||
- content, with a name matching the glob. -}
|
||||
addExcludeSameContent :: String -> Annex ()
|
||||
addExcludeSameContent = addLimit . limitExcludeSameContent
|
||||
|
||||
limitExcludeSameContent :: MkLimit Annex
|
||||
limitExcludeSameContent glob = Right $ MatchFiles
|
||||
{ matchAction = const $ not <$$> matchSameContentGlob glob
|
||||
, matchNeedsFileName = True
|
||||
, matchNeedsFileContent = False
|
||||
, matchNeedsKey = False
|
||||
, matchNeedsLocationLog = False
|
||||
}
|
||||
|
||||
matchSameContentGlob :: String -> MatchInfo -> Annex Bool
|
||||
matchSameContentGlob glob mi = checkKey (go mi) mi
|
||||
where
|
||||
go (MatchingFile fi) k = check k (matchFile fi)
|
||||
go (MatchingInfo p) k = case providedFilePath p of
|
||||
Just f -> check k f
|
||||
Nothing -> return False
|
||||
go (MatchingUserInfo p) k =
|
||||
check k . toRawFilePath
|
||||
=<< getUserInfo (userProvidedFilePath p)
|
||||
|
||||
cglob = compileGlob glob CaseSensative (GlobFilePath True) -- memoized
|
||||
|
||||
matchesglob f = matchGlob cglob (fromRawFilePath f)
|
||||
#ifdef mingw32_HOST_OS
|
||||
|| matchGlob cglob (fromRawFilePath (toInternalGitPath f))
|
||||
#endif
|
||||
|
||||
check k skipf = do
|
||||
-- Find other files with the same content, with filenames
|
||||
-- matching the glob.
|
||||
g <- Annex.gitRepo
|
||||
fs <- filter (/= P.normalise skipf)
|
||||
. filter matchesglob
|
||||
. map (\f -> P.normalise (fromTopFilePath f g))
|
||||
<$> Database.Keys.getAssociatedFiles k
|
||||
-- Some associated files in the keys database may no longer
|
||||
-- correspond to files in the repository. This is checked
|
||||
-- last as it's most expensive.
|
||||
anyM (\f -> maybe False (== k) <$> catKeyFile f) fs
|
||||
|
||||
addMimeType :: String -> Annex ()
|
||||
addMimeType = addMagicLimit "mimetype" getMagicMimeType providedMimeType userProvidedMimeType
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue