Added --accessedwithin matching option.
Useful for dropping old objects from cache repositories. But also, quite a genrally useful thing to have.. Rather than imitiating find's -atime and other options, all of which are pretty horrible to use, I made this match files accessed within a time period, using the same duration format used by git-annex schedule and --limit-time In passing, changed the --limit-time option parser to parse the duration, instead of having it later throw an error. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
fd5a392006
commit
6e6c9cc6d3
5 changed files with 56 additions and 11 deletions
|
@ -5,6 +5,7 @@ git-annex (6.20180720) UNRELEASED; urgency=medium
|
|||
* Fix reversion in display of http 404 errors.
|
||||
* Added remote.name.annex-speculate-present config that can be used to
|
||||
make cache remotes.
|
||||
* Added --accessedwithin matching option.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 31 Jul 2018 12:14:11 -0400
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ import CmdLine.Usage
|
|||
import CmdLine.GlobalSetter
|
||||
import qualified Backend
|
||||
import qualified Types.Backend as Backend
|
||||
import Utility.HumanTime
|
||||
|
||||
-- Global options that are accepted by all git-annex sub-commands,
|
||||
-- although not always used.
|
||||
|
@ -275,6 +276,12 @@ nonWorkTreeMatchingOptions' =
|
|||
<> help "match files the repository wants to drop"
|
||||
<> hidden
|
||||
)
|
||||
, globalSetter Limit.addAccessedWithin $ option (str >>= parseDuration)
|
||||
( long "accessedwithin"
|
||||
<> metavar paramTime
|
||||
<> help "match files accessed within a time interval"
|
||||
<> hidden
|
||||
)
|
||||
]
|
||||
|
||||
-- Options to match files which may not yet be annexed.
|
||||
|
@ -371,7 +378,7 @@ jobsOption =
|
|||
|
||||
timeLimitOption :: [GlobalOption]
|
||||
timeLimitOption =
|
||||
[ globalSetter Limit.addTimeLimit $ strOption
|
||||
[ globalSetter Limit.addTimeLimit $ option (str >>= parseDuration)
|
||||
( long "time-limit" <> short 'T' <> metavar paramTime
|
||||
<> help "stop after the specified amount of time"
|
||||
<> hidden
|
||||
|
|
23
Limit.hs
23
Limit.hs
|
@ -298,21 +298,32 @@ limitMetaData s = case parseMetaDataMatcher s of
|
|||
. S.filter matching
|
||||
. metaDataValues f <$> getCurrentMetaData k
|
||||
|
||||
addTimeLimit :: String -> Annex ()
|
||||
addTimeLimit s = do
|
||||
let seconds = maybe (giveup "bad time-limit") durationToPOSIXTime $
|
||||
parseDuration s
|
||||
addTimeLimit :: Duration -> Annex ()
|
||||
addTimeLimit duration = do
|
||||
start <- liftIO getPOSIXTime
|
||||
let cutoff = start + seconds
|
||||
let cutoff = start + durationToPOSIXTime duration
|
||||
addLimit $ Right $ const $ const $ do
|
||||
now <- liftIO getPOSIXTime
|
||||
if now > cutoff
|
||||
then do
|
||||
warning $ "Time limit (" ++ s ++ ") reached!"
|
||||
warning $ "Time limit (" ++ fromDuration duration ++ ") reached!"
|
||||
shutdown True
|
||||
liftIO $ exitWith $ ExitFailure 101
|
||||
else return True
|
||||
|
||||
addAccessedWithin :: Duration -> Annex ()
|
||||
addAccessedWithin duration = do
|
||||
now <- liftIO getPOSIXTime
|
||||
addLimit $ Right $ const $ checkKey $ check now
|
||||
where
|
||||
check now k = inAnnexCheck k $ \f ->
|
||||
liftIO $ catchDefaultIO False $ do
|
||||
s <- getFileStatus f
|
||||
let accessed = realToFrac (accessTime s)
|
||||
let delta = now - accessed
|
||||
return $ delta <= secs
|
||||
secs = fromIntegral (durationSeconds duration)
|
||||
|
||||
lookupFileKey :: FileInfo -> Annex (Maybe Key)
|
||||
lookupFileKey = lookupFile . currFile
|
||||
|
||||
|
|
|
@ -145,6 +145,20 @@ in either of two repositories.
|
|||
|
||||
Note that this will not match anything when using --all or --unused.
|
||||
|
||||
* `--accessedwithin=interval`
|
||||
|
||||
Matches files that were accessed recently, within the specified time
|
||||
interval.
|
||||
|
||||
The interval can be in the form "5m" or "1h" or "2d" or "1y", or a
|
||||
combination such as "1h5m".
|
||||
|
||||
So for example, `--accessedwithin=1d` matches files that have been
|
||||
accessed within the past day.
|
||||
|
||||
If the OS or filesystem does not support access times, this will not
|
||||
match any files.
|
||||
|
||||
* `--not`
|
||||
|
||||
Inverts the next matching option. For example, to only act on
|
||||
|
|
|
@ -21,10 +21,10 @@ You'll need git-annex 6.20180802 or newer to follow these instructions.
|
|||
## creating the cache
|
||||
|
||||
First let's create a new, empty git-annex repository. It will be put in
|
||||
~/.annex-cache in the example, but for best results, it in the same
|
||||
~/.annex-cache in the example, but for best results, put it in the same
|
||||
filesystem as your other git-annex repositories.
|
||||
|
||||
git init ~/.annex-cache
|
||||
git init --bare ~/.annex-cache
|
||||
cd ~/.annex-cache
|
||||
git annex init
|
||||
git config annex.hardlink true
|
||||
|
@ -79,11 +79,23 @@ enough start.
|
|||
|
||||
## cleaning the cache
|
||||
|
||||
XXX find
|
||||
You safely can remove content from the cache at any time to free up disk
|
||||
space.
|
||||
|
||||
To remove everything:
|
||||
|
||||
cd ~/.annex-cache
|
||||
git annex drop --force
|
||||
|
||||
To remove files that have not been requested from the cache for the past day:
|
||||
|
||||
cd ~/.annex-cache
|
||||
git annex drop --force --not --accessedwithin=1d
|
||||
|
||||
## automatically populating the cache
|
||||
|
||||
XXX
|
||||
The assistant can be used to automatically populate the cache with files
|
||||
that git-annex downloads into a repository.
|
||||
|
||||
## more caches
|
||||
|
||||
|
|
Loading…
Reference in a new issue