migrate script: Get full list of remotes that have a file; doc updates; progress output; corner case fixes

This commit is contained in:
https://christian.amsuess.com/chrysn 2021-08-15 19:01:37 +00:00 committed by admin
parent 905fef31b3
commit fa69431266

View file

@ -42,9 +42,10 @@ but is not accessible; this is a consequence of `dead` not working while known
copies are around. copies are around.
This is not tuned for performance; it tries to avoid any O(n^2) or worse This is not tuned for performance; it tries to avoid any O(n^2) or worse
behavior, and should complete (or at least produce output) within minutes even behavior, and should complete data acquisition (or at least produce output)
on a 150000 file, 1000 commit repository. (The slowest parts being the fsck and within minutes even on a 150000 file, 1000 commit repository. (The slowest
the enumeration of whereis data take the longest time). parts being the fsck and the enumeration of whereis data take the longest
time).
The actual dropping takes quite a while, as each drop and dead are done The actual dropping takes quite a while, as each drop and dead are done
individually. (Some commands have --batch but not for --key). There are no individually. (Some commands have --batch but not for --key). There are no
@ -214,11 +215,14 @@ for line in whereall.stdout:
if wherethis['key'] not in hashes_to_kill: if wherethis['key'] not in hashes_to_kill:
continue continue
remotes = {None if r['here'] else r['uuid'] for r in wherethis['whereis']} remotes = {None if r['here'] else r['uuid'] for r in wherethis['whereis'] + wherethis['untrusted']}
if remotes: if remotes:
hashes_to_kill_remotes[wherethis['key']] = remotes hashes_to_kill_remotes[wherethis['key']] = remotes
wheretodrop = {r or "here" for r in set.union(*hashes_to_kill_remotes.values())} if hashes_to_kill_remotes:
print(f"Found f{len(hashes_to_kill_remotes)} migrated hashes still around on remotes {wheretodrop}") wheretodrop = {r or "here" for r in set.union(*hashes_to_kill_remotes.values())}
else:
wheretodrop = set()
print(f"Found {len(hashes_to_kill_remotes)} migrated hashes still around on remotes {wheretodrop}")
print() print()
print("If you want to really drop all of them, enter `force drop and declare them dead` here:") print("If you want to really drop all of them, enter `force drop and declare them dead` here:")
@ -231,16 +235,20 @@ try:
subprocess.check_call(["git", "-c", "annex.commitmessage=updates before running migrate-mark-dead.py", "annex", "merge"]) subprocess.check_call(["git", "-c", "annex.commitmessage=updates before running migrate-mark-dead.py", "annex", "merge"])
annex_no_autocommit = ["git", "-c", "annex.alwayscommit=false", "annex"] annex_no_autocommit = ["git", "-c", "annex.alwayscommit=false", "annex"]
# Network first, to ensure the password prompts come fast even when most files are dead already # Network first, to ensure the password prompts come fast even when most files are dead already
for (key, remotes) in hashes_to_kill_remotes.items(): for (i, (key, remotes)) in enumerate(hashes_to_kill_remotes.items()):
for r in remotes: for r in remotes:
if r is None: if r is None:
# Can't be run with `--from here` # Can't be run with `--from here`
subprocess.check_call(annex_no_autocommit + ['drop', '--key', key]) subprocess.check_call(annex_no_autocommit + ['drop', '--force', '--key', key])
else: else:
subprocess.check_call(annex_no_autocommit + ['drop', '--force', '--key', key, '--from', r]) subprocess.check_call(annex_no_autocommit + ['drop', '--force', '--key', key, '--from', r])
for key in hashes_to_kill: if (i % 10 == 0):
print(f"Dropped {i} ({100 * i/len(hashes_to_kill_remotes):.1f}% of) present hashes")
for i, key in enumerate(hashes_to_kill):
subprocess.check_call(annex_no_autocommit + ['dead', '--key', key]) subprocess.check_call(annex_no_autocommit + ['dead', '--key', key])
if (i % 100 == 0):
print(f"Marked {i} ({100 * i/len(hashes_to_kill):.1f}% of) unused hashes as dead")
finally: finally:
subprocess.check_call(["git", "-c", "annex.commitmessage=ran migrate-mark-dead.py", "annex", "merge"]) subprocess.check_call(["git", "-c", "annex.commitmessage=ran migrate-mark-dead.py", "annex", "merge"])
``` ```