From eb714c107ba81d805458c770db8d4f22ae12a077 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 15 Nov 2024 13:24:13 -0400 Subject: [PATCH] use 20% less memory when listing unversioned S3 bucket --- Remote/S3.hs | 31 ++++++++++++------- ..._fe6e9bc5460f9bcd24eb3034a2f45fbc._comment | 16 ++++++++++ 2 files changed, 35 insertions(+), 12 deletions(-) create mode 100644 doc/bugs/importtree_from_S3_slows_to_halt_even_with_prefix/comment_7_fe6e9bc5460f9bcd24eb3034a2f45fbc._comment diff --git a/Remote/S3.hs b/Remote/S3.hs index 299f7d7644..36cbedef50 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -601,15 +601,29 @@ listImportableContentsS3 hv r info c = { S3.gbMarker = marker , S3.gbPrefix = fileprefix } - continuelistunversioned h (rsp:l) rsp' + l' <- extractFromResourceT $ + extractunversioned rsp + continuelistunversioned h (l':l) rsp' Nothing -> nomore | otherwise = nomore where nomore = return $ - mkImportableContentsUnversioned info (reverse (rsp:l)) + mkImportableContentsUnversioned + (reverse (extractunversioned rsp:l)) + extractunversioned = mapMaybe extractunversioned' . S3.gbrContents + extractunversioned' oi = do + loc <- bucketImportLocation info $ + T.unpack $ S3.objectKey oi + let sz = S3.objectSize oi + let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi + return (loc, (cid, sz)) + continuelistversioned h l rsp | S3.gbovrIsTruncated rsp = do + let showme x = case x of + S3.DeleteMarker {} -> "delete" + v -> S3.oviKey v rsp' <- sendS3Handle h $ (S3.getBucketObjectVersions (bucket info)) { S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp @@ -620,18 +634,11 @@ listImportableContentsS3 hv r info c = | otherwise = return $ mkImportableContentsVersioned info (reverse (rsp:l)) -mkImportableContentsUnversioned :: S3Info -> [S3.GetBucketResponse] -> ImportableContents (ContentIdentifier, ByteSize) -mkImportableContentsUnversioned info l = ImportableContents - { importableContents = concatMap (mapMaybe extract . S3.gbrContents) l +mkImportableContentsUnversioned :: [[(ImportLocation, (ContentIdentifier, ByteSize))]] -> ImportableContents (ContentIdentifier, ByteSize) +mkImportableContentsUnversioned l = ImportableContents + { importableContents = concat l , importableHistory = [] } - where - extract oi = do - loc <- bucketImportLocation info $ - T.unpack $ S3.objectKey oi - let sz = S3.objectSize oi - let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi - return (loc, (cid, sz)) mkImportableContentsVersioned :: S3Info -> [S3.GetBucketObjectVersionsResponse] -> ImportableContents (ContentIdentifier, ByteSize) mkImportableContentsVersioned info = build . groupfiles diff --git a/doc/bugs/importtree_from_S3_slows_to_halt_even_with_prefix/comment_7_fe6e9bc5460f9bcd24eb3034a2f45fbc._comment b/doc/bugs/importtree_from_S3_slows_to_halt_even_with_prefix/comment_7_fe6e9bc5460f9bcd24eb3034a2f45fbc._comment new file mode 100644 index 0000000000..abeaf7d584 --- /dev/null +++ b/doc/bugs/importtree_from_S3_slows_to_halt_even_with_prefix/comment_7_fe6e9bc5460f9bcd24eb3034a2f45fbc._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 7""" + date="2024-11-15T17:16:51Z" + content=""" +Trying the same command but with versioning=yes, I have verified that + +* it does not have the same loop forever behavior +* it does use a lot of memory quite quickly + +Going back to the unversioned command, I was able to reduce the memory use +by 20% by processing each result, rather than building up a list of results +and processing at the end. It will be harder to do that in the versioning +case, but I expect it will improve it at least that much, and probably +more, since it will be able to GC all the delete markers. +"""]]