use 20% less memory when listing unversioned S3 bucket

This commit is contained in:
Joey Hess 2024-11-15 13:24:13 -04:00
parent 43a4adda6e
commit eb714c107b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 35 additions and 12 deletions

View file

@ -601,15 +601,29 @@ listImportableContentsS3 hv r info c =
{ S3.gbMarker = marker { S3.gbMarker = marker
, S3.gbPrefix = fileprefix , S3.gbPrefix = fileprefix
} }
continuelistunversioned h (rsp:l) rsp' l' <- extractFromResourceT $
extractunversioned rsp
continuelistunversioned h (l':l) rsp'
Nothing -> nomore Nothing -> nomore
| otherwise = nomore | otherwise = nomore
where where
nomore = return $ nomore = return $
mkImportableContentsUnversioned info (reverse (rsp:l)) mkImportableContentsUnversioned
(reverse (extractunversioned rsp:l))
extractunversioned = mapMaybe extractunversioned' . S3.gbrContents
extractunversioned' oi = do
loc <- bucketImportLocation info $
T.unpack $ S3.objectKey oi
let sz = S3.objectSize oi
let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi
return (loc, (cid, sz))
continuelistversioned h l rsp continuelistversioned h l rsp
| S3.gbovrIsTruncated rsp = do | S3.gbovrIsTruncated rsp = do
let showme x = case x of
S3.DeleteMarker {} -> "delete"
v -> S3.oviKey v
rsp' <- sendS3Handle h $ rsp' <- sendS3Handle h $
(S3.getBucketObjectVersions (bucket info)) (S3.getBucketObjectVersions (bucket info))
{ S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp { S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp
@ -620,18 +634,11 @@ listImportableContentsS3 hv r info c =
| otherwise = return $ | otherwise = return $
mkImportableContentsVersioned info (reverse (rsp:l)) mkImportableContentsVersioned info (reverse (rsp:l))
mkImportableContentsUnversioned :: S3Info -> [S3.GetBucketResponse] -> ImportableContents (ContentIdentifier, ByteSize) mkImportableContentsUnversioned :: [[(ImportLocation, (ContentIdentifier, ByteSize))]] -> ImportableContents (ContentIdentifier, ByteSize)
mkImportableContentsUnversioned info l = ImportableContents mkImportableContentsUnversioned l = ImportableContents
{ importableContents = concatMap (mapMaybe extract . S3.gbrContents) l { importableContents = concat l
, importableHistory = [] , importableHistory = []
} }
where
extract oi = do
loc <- bucketImportLocation info $
T.unpack $ S3.objectKey oi
let sz = S3.objectSize oi
let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi
return (loc, (cid, sz))
mkImportableContentsVersioned :: S3Info -> [S3.GetBucketObjectVersionsResponse] -> ImportableContents (ContentIdentifier, ByteSize) mkImportableContentsVersioned :: S3Info -> [S3.GetBucketObjectVersionsResponse] -> ImportableContents (ContentIdentifier, ByteSize)
mkImportableContentsVersioned info = build . groupfiles mkImportableContentsVersioned info = build . groupfiles

View file

@ -0,0 +1,16 @@
[[!comment format=mdwn
username="joey"
subject="""comment 7"""
date="2024-11-15T17:16:51Z"
content="""
Trying the same command but with versioning=yes, I have verified that
* it does not have the same loop forever behavior
* it does use a lot of memory quite quickly
Going back to the unversioned command, I was able to reduce the memory use
by 20% by processing each result, rather than building up a list of results
and processing at the end. It will be harder to do that in the versioning
case, but I expect it will improve it at least that much, and probably
more, since it will be able to GC all the delete markers.
"""]]