use 20% less memory when listing unversioned S3 bucket

This commit is contained in:
Joey Hess 2024-11-15 13:24:13 -04:00
parent 43a4adda6e
commit eb714c107b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 35 additions and 12 deletions

View file

@ -601,15 +601,29 @@ listImportableContentsS3 hv r info c =
{ S3.gbMarker = marker
, S3.gbPrefix = fileprefix
}
continuelistunversioned h (rsp:l) rsp'
l' <- extractFromResourceT $
extractunversioned rsp
continuelistunversioned h (l':l) rsp'
Nothing -> nomore
| otherwise = nomore
where
nomore = return $
mkImportableContentsUnversioned info (reverse (rsp:l))
mkImportableContentsUnversioned
(reverse (extractunversioned rsp:l))
extractunversioned = mapMaybe extractunversioned' . S3.gbrContents
extractunversioned' oi = do
loc <- bucketImportLocation info $
T.unpack $ S3.objectKey oi
let sz = S3.objectSize oi
let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi
return (loc, (cid, sz))
continuelistversioned h l rsp
| S3.gbovrIsTruncated rsp = do
let showme x = case x of
S3.DeleteMarker {} -> "delete"
v -> S3.oviKey v
rsp' <- sendS3Handle h $
(S3.getBucketObjectVersions (bucket info))
{ S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp
@ -620,18 +634,11 @@ listImportableContentsS3 hv r info c =
| otherwise = return $
mkImportableContentsVersioned info (reverse (rsp:l))
mkImportableContentsUnversioned :: S3Info -> [S3.GetBucketResponse] -> ImportableContents (ContentIdentifier, ByteSize)
mkImportableContentsUnversioned info l = ImportableContents
{ importableContents = concatMap (mapMaybe extract . S3.gbrContents) l
mkImportableContentsUnversioned :: [[(ImportLocation, (ContentIdentifier, ByteSize))]] -> ImportableContents (ContentIdentifier, ByteSize)
mkImportableContentsUnversioned l = ImportableContents
{ importableContents = concat l
, importableHistory = []
}
where
extract oi = do
loc <- bucketImportLocation info $
T.unpack $ S3.objectKey oi
let sz = S3.objectSize oi
let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi
return (loc, (cid, sz))
mkImportableContentsVersioned :: S3Info -> [S3.GetBucketObjectVersionsResponse] -> ImportableContents (ContentIdentifier, ByteSize)
mkImportableContentsVersioned info = build . groupfiles

View file

@ -0,0 +1,16 @@
[[!comment format=mdwn
username="joey"
subject="""comment 7"""
date="2024-11-15T17:16:51Z"
content="""
Trying the same command but with versioning=yes, I have verified that
* it does not have the same loop forever behavior
* it does use a lot of memory quite quickly
Going back to the unversioned command, I was able to reduce the memory use
by 20% by processing each result, rather than building up a list of results
and processing at the end. It will be harder to do that in the versioning
case, but I expect it will improve it at least that much, and probably
more, since it will be able to GC all the delete markers.
"""]]