diff --git a/Annex/Content.hs b/Annex/Content.hs index ccec9e8313..cac8646dd3 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -43,6 +43,7 @@ module Annex.Content ( moveBad, KeyLocation(..), listKeys, + listKeys', saveState, downloadUrl, preseedTmp, @@ -653,22 +654,26 @@ data KeyLocation = InAnnex | InAnywhere - .git/annex/objects, whether or not the content is present. -} listKeys :: KeyLocation -> Annex [Key] -listKeys keyloc = do +listKeys keyloc = listKeys' keyloc (const (pure True)) + +{- Due to use of unsafeInterleaveIO, the passed filter action + - will be run in a copy of the Annex state, so any changes it + - makes to the state will not be preserved. -} +listKeys' :: KeyLocation -> (Key -> Annex Bool) -> Annex [Key] +listKeys' keyloc want = do dir <- fromRepo gitAnnexObjectDir - {- In order to run Annex monad actions within unsafeInterleaveIO, - - the current state is taken and reused. No changes made to this - - state will be preserved. - -} s <- Annex.getState id + r <- Annex.getRead id depth <- gitAnnexLocationDepth <$> Annex.getGitConfig - liftIO $ walk s depth (fromRawFilePath dir) + liftIO $ walk (s, r) depth (fromRawFilePath dir) where walk s depth dir = do contents <- catchDefaultIO [] (dirContents dir) if depth < 2 then do - contents' <- filterM (present s) contents - let keys = mapMaybe (fileKey . P.takeFileName . toRawFilePath) contents' + contents' <- filterM present contents + keys <- filterM (Annex.eval s . want) $ + mapMaybe (fileKey . P.takeFileName . toRawFilePath) contents' continue keys [] else do let deeper = walk s (depth - 1) @@ -683,8 +688,8 @@ listKeys keyloc = do InAnywhere -> True _ -> False - present _ _ | inanywhere = pure True - present _ d = presentInAnnex d + present _ | inanywhere = pure True + present d = presentInAnnex d presentInAnnex = doesFileExist . contentfile contentfile d = d takeFileName d diff --git a/CHANGELOG b/CHANGELOG index 68a289d7ff..0f0d53d347 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -11,6 +11,9 @@ git-annex (10.20220128) UNRELEASED; urgency=medium to be more like other batch commands. * registerurl, unregisterurl: Added --json and --json-error-messages options. * Avoid git status taking a long time after git-annex unlock of many files. + * info: Allow using matching options in more situations. File matching + options like --include will be rejected in situations where there is + no filename to match against. -- Joey Hess Mon, 31 Jan 2022 13:14:42 -0400 diff --git a/Command/Info.hs b/Command/Info.hs index 6594498e3a..f74ac5183c 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2011-2021 Joey Hess + - Copyright 2011-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -132,7 +132,6 @@ start o ps = do globalInfo :: InfoOptions -> Annex () globalInfo o = do - disallowMatchingOptions u <- getUUID whenM ((==) DeadTrusted <$> lookupTrust u) $ earlyWarning "Warning: This repository is currently marked as dead." @@ -145,7 +144,6 @@ itemInfo :: InfoOptions -> (SeekInput, String) -> Annex () itemInfo o (si, p) = ifM (isdir p) ( dirInfo o p si , do - disallowMatchingOptions v <- Remote.byName' p case v of Right r -> remoteInfo o r si @@ -168,10 +166,6 @@ noInfo s si = do showNote $ "not a directory or an annexed file or a treeish or a remote or a uuid" showEndFail -disallowMatchingOptions :: Annex () -disallowMatchingOptions = whenM Limit.limited $ - giveup "File matching options can only be used when getting info on a directory." - dirInfo :: InfoOptions -> FilePath -> SeekInput -> Annex () dirInfo o dir si = showCustom (unwords ["info", dir]) si $ do stats <- selStats @@ -197,9 +191,13 @@ treeishInfo o t si = do tostats = map (\s -> s t) fileInfo :: InfoOptions -> FilePath -> SeekInput -> Key -> Annex () -fileInfo o file si k = showCustom (unwords ["info", file]) si $ do - evalStateT (mapM_ showStat (file_stats file k)) (emptyStatInfo o) - return True +fileInfo o file si k = do + matcher <- Limit.getMatcher + let file' = toRawFilePath file + whenM (matcher $ MatchingFile $ FileInfo file' file' (Just k)) $ + showCustom (unwords ["info", file]) si $ do + evalStateT (mapM_ showStat (file_stats file k)) (emptyStatInfo o) + return True remoteInfo :: InfoOptions -> Remote -> SeekInput -> Annex () remoteInfo o r si = showCustom (unwords ["info", Remote.name r]) si $ do @@ -404,7 +402,7 @@ bad_data_size :: Stat bad_data_size = staleSize "bad keys size" gitAnnexBadDir key_size :: Key -> Stat -key_size k = simpleStat "size" $ showSizeKeys $ foldKeys [k] +key_size k = simpleStat "size" $ showSizeKeys $ addKey k emptyKeyInfo key_name :: Key -> Stat key_name k = simpleStat "key" $ pure $ serializeKey k @@ -525,7 +523,9 @@ cachedPresentData = do case presentData s of Just v -> return v Nothing -> do - v <- foldKeys <$> lift (listKeys InAnnex) + matcher <- lift getKeyOnlyMatcher + v <- foldl' (flip addKey) emptyKeyInfo + <$> lift (listKeys' InAnnex (matchOnKey matcher)) put s { presentData = Just v } return v @@ -535,9 +535,13 @@ cachedRemoteData u = do case M.lookup u (repoData s) of Just v -> return (Right v) Nothing -> do + matcher <- lift getKeyOnlyMatcher let combinedata d uk = finishCheck uk >>= \case Nothing -> return d - Just k -> return $ addKey k d + Just k -> ifM (matchOnKey matcher k) + ( return (addKey k d) + , return d + ) lift (loggedKeysFor' u) >>= \case Just (ks, cleanup) -> do v <- lift $ foldM combinedata emptyKeyInfo ks @@ -552,8 +556,13 @@ cachedReferencedData = do case referencedData s of Just v -> return v Nothing -> do + matcher <- lift getKeyOnlyMatcher + let combinedata k _f d = ifM (matchOnKey matcher k) + ( return (addKey k d) + , return d + ) !v <- lift $ Command.Unused.withKeysReferenced - emptyKeyInfo addKey + emptyKeyInfo combinedata put s { referencedData = Just v } return v @@ -596,11 +605,16 @@ getDirStatInfo o dir = do getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo) getTreeStatInfo o r = do fast <- Annex.getState Annex.fast + -- git lstree filenames start with a leading "./" that prevents + -- matching, and also things like --include are supposed to + -- match relative to the current directory, which does not make + -- sense when matching against files in some arbitrary tree. + matcher <- getKeyOnlyMatcher (ls, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive (LsTree.LsTreeLong False) r - (presentdata, referenceddata, repodata) <- go fast ls initial + (presentdata, referenceddata, repodata) <- go fast matcher ls initial ifM (liftIO cleanup) ( return $ Just $ StatInfo (Just presentdata) (Just referenceddata) repodata Nothing o @@ -608,23 +622,25 @@ getTreeStatInfo o r = do ) where initial = (emptyKeyInfo, emptyKeyInfo, M.empty) - go _ [] vs = return vs - go fast (l:ls) vs@(presentdata, referenceddata, repodata) = do - mk <- catKey (LsTree.sha l) - case mk of - Nothing -> go fast ls vs - Just key -> do - !presentdata' <- ifM (inAnnex key) - ( return $ addKey key presentdata - , return presentdata - ) - let !referenceddata' = addKey key referenceddata - !repodata' <- if fast - then return repodata - else do - locs <- Remote.keyLocations key - return (updateRepoData key locs repodata) - go fast ls $! (presentdata', referenceddata', repodata') + go _ _ [] vs = return vs + go fast matcher (l:ls) vs@(presentdata, referenceddata, repodata) = + catKey (LsTree.sha l) >>= \case + Nothing -> go fast matcher ls vs + Just key -> ifM (matchOnKey matcher key) + ( do + !presentdata' <- ifM (inAnnex key) + ( return $ addKey key presentdata + , return presentdata + ) + let !referenceddata' = addKey key referenceddata + !repodata' <- if fast + then return repodata + else do + locs <- Remote.keyLocations key + return (updateRepoData key locs repodata) + go fast matcher ls $! (presentdata', referenceddata', repodata') + , go fast matcher ls vs + ) emptyKeyInfo :: KeyInfo emptyKeyInfo = KeyInfo 0 0 0 M.empty @@ -632,9 +648,6 @@ emptyKeyInfo = KeyInfo 0 0 0 M.empty emptyNumCopiesStats :: NumCopiesStats emptyNumCopiesStats = NumCopiesStats M.empty -foldKeys :: [Key] -> KeyInfo -foldKeys = foldl' (flip addKey) emptyKeyInfo - addKey :: Key -> KeyInfo -> KeyInfo addKey key (KeyInfo count size unknownsize backends) = KeyInfo count' size' unknownsize' backends' @@ -700,3 +713,20 @@ mkSizer = ifM (bytesOption . infoOptions <$> get) ( return (const $ const show) , return roughSize ) + +getKeyOnlyMatcher :: Annex (MatchInfo -> Annex Bool) +getKeyOnlyMatcher = do + whenM (Limit.introspect matchNeedsFileName) $ do + warning "File matching options cannot be applied when getting this info." + giveup "Unable to continue." + Limit.getMatcher + +matchOnKey :: (MatchInfo -> Annex Bool) -> Key -> Annex Bool +matchOnKey matcher k = matcher $ MatchingInfo $ ProvidedInfo + { providedFilePath = Nothing + , providedKey = Just k + , providedFileSize = Nothing + , providedMimeType = Nothing + , providedMimeEncoding = Nothing + , providedLinkType = Nothing + } diff --git a/Command/Unused.hs b/Command/Unused.hs index c0eea57068..0fe969e58c 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -183,12 +183,10 @@ excludeReferenced refspec ks = runbloomfilter withKeysReferencedM ks runfilter a l = a l runbloomfilter a = runfilter $ \l -> bloomFilter l <$> genBloomFilter a -{- Given an initial value, folds it with each key referenced by - - files in the working tree. -} -withKeysReferenced :: v -> (Key -> v -> v) -> Annex v -withKeysReferenced initial a = withKeysReferenced' Nothing initial folda - where - folda k _ v = return $ a k v +{- Given an initial value, accumulates the value over each key + - referenced by files in the working tree. -} +withKeysReferenced :: v -> (Key -> RawFilePath -> v -> Annex v) -> Annex v +withKeysReferenced initial = withKeysReferenced' Nothing initial {- Runs an action on each referenced key in the working tree. -} withKeysReferencedM :: (Key -> Annex ()) -> Annex () diff --git a/doc/git-annex-info.mdwn b/doc/git-annex-info.mdwn index 42a667b4fa..e6c9bcee57 100644 --- a/doc/git-annex-info.mdwn +++ b/doc/git-annex-info.mdwn @@ -11,9 +11,9 @@ git annex info `[directory|file|treeish|remote|description|uuid ...]` Displays statistics and other information for the specified item, which can be a directory, or a file, or a treeish, or a remote, or the description or uuid of a repository. - + When no item is specified, displays statistics and information -for the local repository and all known annexed files. +for the local repository and all annexed content. # OPTIONS @@ -45,11 +45,10 @@ for the local repository and all known annexed files. Makes the `--batch` input be delimited by nulls instead of the usual newlines. -* file matching options +* matching options - When a directory is specified, the [[git-annex-matching-options]](1) - can be used to select the files in the directory that are included - in the statistics. + The [[git-annex-matching-options]](1) can be used to select what + to include in the statistics. * Also the [[git-annex-common-options]](1) can be used. diff --git a/doc/todo/info__58___allow_file_matching_options_for_all_keys.mdwn b/doc/todo/info__58___allow_file_matching_options_for_all_keys.mdwn index 46a36dec56..365c6e0047 100644 --- a/doc/todo/info__58___allow_file_matching_options_for_all_keys.mdwn +++ b/doc/todo/info__58___allow_file_matching_options_for_all_keys.mdwn @@ -9,3 +9,5 @@ git-annex: File matching options can only be used when getting info on a directo There should be a way to use `info` to query aggregate information properties of all keys instead of directories. I have used `git annex info .` in the repos I used up until now because every key was in the tree. Though I also have a feeling that operating on all keys could be significantly faster than filtering them to match some directory. + +> [[done]] --[[Joey]] diff --git a/doc/todo/info__58___allow_file_matching_options_for_all_keys/comment_1_3b96becda1db6ae23adb5077b6332718._comment b/doc/todo/info__58___allow_file_matching_options_for_all_keys/comment_1_3b96becda1db6ae23adb5077b6332718._comment index 526501622b..45cbb1e736 100644 --- a/doc/todo/info__58___allow_file_matching_options_for_all_keys/comment_1_3b96becda1db6ae23adb5077b6332718._comment +++ b/doc/todo/info__58___allow_file_matching_options_for_all_keys/comment_1_3b96becda1db6ae23adb5077b6332718._comment @@ -12,6 +12,8 @@ where used, but now `matchNeedsFileName` is available and it could only reject those. So this can be implemented by making cachedPresentData -and cachedRemoteData get the matcher, check if it's +and cachedRemoteData (etc) get the matcher, check if it's the right kind and apply it to the keys. + +done """]]