diff --git a/Annex/Content.hs b/Annex/Content.hs index fdd03f320e..bf5a6c3a7e 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -300,7 +300,7 @@ getKeysPresent' dir = do -- 2 levels of hashing levela <- dirContents dir levelb <- mapM dirContents levela - contents <- mapM dirContents (concat levelb) + contents <- unsafeInterleaveIO $ mapM dirContents (concat levelb) let files = concat contents return $ mapMaybe (fileKey . takeFileName) files diff --git a/Command/Status.hs b/Command/Status.hs index dfe847bb8e..0b1741dc0a 100644 --- a/Command/Status.hs +++ b/Command/Status.hs @@ -5,12 +5,12 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE BangPatterns #-} + module Command.Status where import Control.Monad.State.Strict import qualified Data.Map as M -import qualified Data.Set as S -import Data.Set (Set) import Text.JSON import Common.Annex @@ -32,10 +32,18 @@ import Remote -- a named computation that produces a statistic type Stat = StatState (Maybe (String, StatState String)) --- cached info that multiple Stats may need +-- data about a set of keys +data KeyData = KeyData + { countKeys :: Integer + , sizeKeys :: Integer + , unknownSizeKeys :: Integer + , backendsKeys :: M.Map String Integer + } + +-- cached info that multiple Stats use data StatInfo = StatInfo - { keysPresentCache :: Maybe (Set Key) - , keysReferencedCache :: Maybe (Set Key) + { presentData :: Maybe KeyData + , referencedData :: Maybe KeyData } -- a state monad for running Stats in @@ -122,19 +130,19 @@ remote_list level desc = stat n $ nojson $ lift $ do local_annex_size :: Stat local_annex_size = stat "local annex size" $ json id $ - keySizeSum <$> cachedKeysPresent + showSizeKeys <$> cachedPresentData local_annex_keys :: Stat local_annex_keys = stat "local annex keys" $ json show $ - S.size <$> cachedKeysPresent + countKeys <$> cachedPresentData known_annex_size :: Stat known_annex_size = stat "known annex size" $ json id $ - keySizeSum <$> cachedKeysReferenced + showSizeKeys <$> cachedReferencedData known_annex_keys :: Stat known_annex_keys = stat "known annex keys" $ json show $ - S.size <$> cachedKeysReferenced + countKeys <$> cachedReferencedData tmp_size :: Stat tmp_size = staleSize "temporary directory size" gitAnnexTmpDir @@ -144,46 +152,62 @@ bad_data_size = staleSize "bad keys size" gitAnnexBadDir backend_usage :: Stat backend_usage = stat "backend usage" $ nojson $ - calc <$> cachedKeysReferenced <*> cachedKeysPresent + calc + <$> (backendsKeys <$> cachedReferencedData) + <*> (backendsKeys <$> cachedPresentData) where - calc a b = pp "" $ reverse . sort $ map swap $ splits $ S.toList $ S.union a b - splits :: [Key] -> [(String, Integer)] - splits ks = M.toList $ M.fromListWith (+) $ map tcount ks - tcount k = (keyBackendName k, 1) - swap (a, b) = (b, a) + calc a b = pp "" $ reverse . sort $ map swap $ M.toList $ M.unionWith (+) a b pp c [] = c pp c ((n, b):xs) = "\n\t" ++ b ++ ": " ++ show n ++ pp c xs + swap (a, b) = (b, a) -cachedKeysPresent :: StatState (Set Key) -cachedKeysPresent = do +cachedPresentData :: StatState KeyData +cachedPresentData = do s <- get - case keysPresentCache s of + case presentData s of Just v -> return v Nothing -> do - keys <- S.fromList <$> lift getKeysPresent - put s { keysPresentCache = Just keys } - return keys + v <- foldKeys <$> lift getKeysPresent + put s { presentData = Just v } + return v -cachedKeysReferenced :: StatState (Set Key) -cachedKeysReferenced = do +cachedReferencedData :: StatState KeyData +cachedReferencedData = do s <- get - case keysReferencedCache s of + case referencedData s of Just v -> return v Nothing -> do - keys <- S.fromList <$> lift Command.Unused.getKeysReferenced - put s { keysReferencedCache = Just keys } - return keys + !v <- lift $ Command.Unused.withKeysReferenced + emptyKeyData addKey + put s { referencedData = Just v } + return v -keySizeSum :: Set Key -> String -keySizeSum s = total ++ missingnote +emptyKeyData :: KeyData +emptyKeyData = KeyData 0 0 0 M.empty + +foldKeys :: [Key] -> KeyData +foldKeys = foldl' (flip addKey) emptyKeyData + +addKey :: Key -> KeyData -> KeyData +addKey key (KeyData count size unknownsize backends) = + KeyData count' size' unknownsize' backends' where - knownsizes = mapMaybe keySize $ S.toList s - total = roughSize storageUnits False $ sum knownsizes - missing = S.size s - genericLength knownsizes + {- All calculations strict to avoid thunks when repeatedly + - applied to many keys. -} + !count' = count + 1 + !backends' = M.insertWith' (+) (keyBackendName key) 1 backends + !size' = maybe size (+ size) ks + !unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks + ks = keySize key + +showSizeKeys :: KeyData -> String +showSizeKeys d = total ++ missingnote + where + total = roughSize storageUnits False $ sizeKeys d missingnote - | missing == 0 = "" + | unknownSizeKeys d == 0 = "" | otherwise = aside $ - "+ " ++ show missing ++ + "+ " ++ show (unknownSizeKeys d) ++ " keys of unknown size" staleSize :: String -> (Git.Repo -> FilePath) -> Stat @@ -192,7 +216,7 @@ staleSize label dirspec = do if null keys then nostat else stat label $ json (++ aside "clean up with git-annex unused") $ - return $ keySizeSum $ S.fromList keys + return $ showSizeKeys $ foldKeys keys aside :: String -> String aside s = " (" ++ s ++ ")" diff --git a/Command/Unused.hs b/Command/Unused.hs index ba14bfc4af..69b58c5e70 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2011 Joey Hess + - Copyright 2010-2012 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -171,7 +171,7 @@ excludeReferenced l = do go s (r:rs) | s == S.empty = return [] -- optimisation | otherwise = do - !s' <- withKeysReferencedInGit r s S.delete + s' <- withKeysReferencedInGit r s S.delete go s' rs {- Finds items in the first, smaller list, that are not @@ -186,21 +186,14 @@ exclude smaller larger = S.toList $ remove larger $ S.fromList smaller where remove a b = foldl (flip S.delete) b a -{- List of keys referenced by symlinks in the git repo. -} -getKeysReferenced :: Annex [Key] -getKeysReferenced = do - top <- fromRepo Git.workTree - files <- inRepo $ LsFiles.inRepo [top] - keypairs <- mapM Backend.lookupFile files - return $ map fst $ catMaybes keypairs - {- Given an initial value, mutates it using an action for each - key referenced by symlinks in the git repo. -} withKeysReferenced :: v -> (Key -> v -> v) -> Annex v -withKeysReferenced initial a = do - top <- fromRepo Git.workTree - go initial =<< inRepo (LsFiles.inRepo [top]) +withKeysReferenced initial a = go initial =<< files where + files = do + top <- fromRepo Git.workTree + inRepo $ LsFiles.inRepo [top] go v [] = return v go v (f:fs) = do x <- Backend.lookupFile f diff --git a/debian/changelog b/debian/changelog index 2cb5a1aea2..6da54056c7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,6 +6,7 @@ git-annex (3.20120310) UNRELEASED; urgency=low * unused: Reduce memory usage significantly. Still not constant space, but now only needs to store the set of file contents that are present in the annex in memory. + * status: Fixed to run in nearly constant space. -- Joey Hess Sat, 10 Mar 2012 14:03:22 -0400