From 6e6c9cc6d3d10ab027a346cde417841924df3cbe Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 1 Aug 2018 15:20:18 -0400 Subject: [PATCH] Added --accessedwithin matching option. Useful for dropping old objects from cache repositories. But also, quite a genrally useful thing to have.. Rather than imitiating find's -atime and other options, all of which are pretty horrible to use, I made this match files accessed within a time period, using the same duration format used by git-annex schedule and --limit-time In passing, changed the --limit-time option parser to parse the duration, instead of having it later throw an error. This commit was supported by the NSF-funded DataLad project. --- CHANGELOG | 1 + CmdLine/GitAnnex/Options.hs | 9 +++++++- Limit.hs | 23 +++++++++++++++----- doc/git-annex-matching-options.mdwn | 14 ++++++++++++ doc/tips/local_caching_of_annexed_files.mdwn | 20 +++++++++++++---- 5 files changed, 56 insertions(+), 11 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index dd1870c254..1fd9351641 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,7 @@ git-annex (6.20180720) UNRELEASED; urgency=medium * Fix reversion in display of http 404 errors. * Added remote.name.annex-speculate-present config that can be used to make cache remotes. + * Added --accessedwithin matching option. -- Joey Hess Tue, 31 Jul 2018 12:14:11 -0400 diff --git a/CmdLine/GitAnnex/Options.hs b/CmdLine/GitAnnex/Options.hs index 97cef88b94..791f499d13 100644 --- a/CmdLine/GitAnnex/Options.hs +++ b/CmdLine/GitAnnex/Options.hs @@ -38,6 +38,7 @@ import CmdLine.Usage import CmdLine.GlobalSetter import qualified Backend import qualified Types.Backend as Backend +import Utility.HumanTime -- Global options that are accepted by all git-annex sub-commands, -- although not always used. @@ -275,6 +276,12 @@ nonWorkTreeMatchingOptions' = <> help "match files the repository wants to drop" <> hidden ) + , globalSetter Limit.addAccessedWithin $ option (str >>= parseDuration) + ( long "accessedwithin" + <> metavar paramTime + <> help "match files accessed within a time interval" + <> hidden + ) ] -- Options to match files which may not yet be annexed. @@ -371,7 +378,7 @@ jobsOption = timeLimitOption :: [GlobalOption] timeLimitOption = - [ globalSetter Limit.addTimeLimit $ strOption + [ globalSetter Limit.addTimeLimit $ option (str >>= parseDuration) ( long "time-limit" <> short 'T' <> metavar paramTime <> help "stop after the specified amount of time" <> hidden diff --git a/Limit.hs b/Limit.hs index 5d00e2e688..93b32a89f6 100644 --- a/Limit.hs +++ b/Limit.hs @@ -298,21 +298,32 @@ limitMetaData s = case parseMetaDataMatcher s of . S.filter matching . metaDataValues f <$> getCurrentMetaData k -addTimeLimit :: String -> Annex () -addTimeLimit s = do - let seconds = maybe (giveup "bad time-limit") durationToPOSIXTime $ - parseDuration s +addTimeLimit :: Duration -> Annex () +addTimeLimit duration = do start <- liftIO getPOSIXTime - let cutoff = start + seconds + let cutoff = start + durationToPOSIXTime duration addLimit $ Right $ const $ const $ do now <- liftIO getPOSIXTime if now > cutoff then do - warning $ "Time limit (" ++ s ++ ") reached!" + warning $ "Time limit (" ++ fromDuration duration ++ ") reached!" shutdown True liftIO $ exitWith $ ExitFailure 101 else return True +addAccessedWithin :: Duration -> Annex () +addAccessedWithin duration = do + now <- liftIO getPOSIXTime + addLimit $ Right $ const $ checkKey $ check now + where + check now k = inAnnexCheck k $ \f -> + liftIO $ catchDefaultIO False $ do + s <- getFileStatus f + let accessed = realToFrac (accessTime s) + let delta = now - accessed + return $ delta <= secs + secs = fromIntegral (durationSeconds duration) + lookupFileKey :: FileInfo -> Annex (Maybe Key) lookupFileKey = lookupFile . currFile diff --git a/doc/git-annex-matching-options.mdwn b/doc/git-annex-matching-options.mdwn index 2802fe60bc..81f705f3ad 100644 --- a/doc/git-annex-matching-options.mdwn +++ b/doc/git-annex-matching-options.mdwn @@ -145,6 +145,20 @@ in either of two repositories. Note that this will not match anything when using --all or --unused. +* `--accessedwithin=interval` + + Matches files that were accessed recently, within the specified time + interval. + + The interval can be in the form "5m" or "1h" or "2d" or "1y", or a + combination such as "1h5m". + + So for example, `--accessedwithin=1d` matches files that have been + accessed within the past day. + + If the OS or filesystem does not support access times, this will not + match any files. + * `--not` Inverts the next matching option. For example, to only act on diff --git a/doc/tips/local_caching_of_annexed_files.mdwn b/doc/tips/local_caching_of_annexed_files.mdwn index b7ddb545b9..5c0809933e 100644 --- a/doc/tips/local_caching_of_annexed_files.mdwn +++ b/doc/tips/local_caching_of_annexed_files.mdwn @@ -21,10 +21,10 @@ You'll need git-annex 6.20180802 or newer to follow these instructions. ## creating the cache First let's create a new, empty git-annex repository. It will be put in -~/.annex-cache in the example, but for best results, it in the same +~/.annex-cache in the example, but for best results, put it in the same filesystem as your other git-annex repositories. - git init ~/.annex-cache + git init --bare ~/.annex-cache cd ~/.annex-cache git annex init git config annex.hardlink true @@ -79,11 +79,23 @@ enough start. ## cleaning the cache -XXX find +You safely can remove content from the cache at any time to free up disk +space. + +To remove everything: + + cd ~/.annex-cache + git annex drop --force + +To remove files that have not been requested from the cache for the past day: + + cd ~/.annex-cache + git annex drop --force --not --accessedwithin=1d ## automatically populating the cache -XXX +The assistant can be used to automatically populate the cache with files +that git-annex downloads into a repository. ## more caches