migrate script: Get full list of remotes that have a file; doc updates; progress output; corner case fixes
This commit is contained in:
parent
905fef31b3
commit
fa69431266
1 changed files with 17 additions and 9 deletions
|
@ -42,9 +42,10 @@ but is not accessible; this is a consequence of `dead` not working while known
|
||||||
copies are around.
|
copies are around.
|
||||||
|
|
||||||
This is not tuned for performance; it tries to avoid any O(n^2) or worse
|
This is not tuned for performance; it tries to avoid any O(n^2) or worse
|
||||||
behavior, and should complete (or at least produce output) within minutes even
|
behavior, and should complete data acquisition (or at least produce output)
|
||||||
on a 150000 file, 1000 commit repository. (The slowest parts being the fsck and
|
within minutes even on a 150000 file, 1000 commit repository. (The slowest
|
||||||
the enumeration of whereis data take the longest time).
|
parts being the fsck and the enumeration of whereis data take the longest
|
||||||
|
time).
|
||||||
|
|
||||||
The actual dropping takes quite a while, as each drop and dead are done
|
The actual dropping takes quite a while, as each drop and dead are done
|
||||||
individually. (Some commands have --batch but not for --key). There are no
|
individually. (Some commands have --batch but not for --key). There are no
|
||||||
|
@ -214,11 +215,14 @@ for line in whereall.stdout:
|
||||||
if wherethis['key'] not in hashes_to_kill:
|
if wherethis['key'] not in hashes_to_kill:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
remotes = {None if r['here'] else r['uuid'] for r in wherethis['whereis']}
|
remotes = {None if r['here'] else r['uuid'] for r in wherethis['whereis'] + wherethis['untrusted']}
|
||||||
if remotes:
|
if remotes:
|
||||||
hashes_to_kill_remotes[wherethis['key']] = remotes
|
hashes_to_kill_remotes[wherethis['key']] = remotes
|
||||||
wheretodrop = {r or "here" for r in set.union(*hashes_to_kill_remotes.values())}
|
if hashes_to_kill_remotes:
|
||||||
print(f"Found f{len(hashes_to_kill_remotes)} migrated hashes still around on remotes {wheretodrop}")
|
wheretodrop = {r or "here" for r in set.union(*hashes_to_kill_remotes.values())}
|
||||||
|
else:
|
||||||
|
wheretodrop = set()
|
||||||
|
print(f"Found {len(hashes_to_kill_remotes)} migrated hashes still around on remotes {wheretodrop}")
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print("If you want to really drop all of them, enter `force drop and declare them dead` here:")
|
print("If you want to really drop all of them, enter `force drop and declare them dead` here:")
|
||||||
|
@ -231,16 +235,20 @@ try:
|
||||||
subprocess.check_call(["git", "-c", "annex.commitmessage=updates before running migrate-mark-dead.py", "annex", "merge"])
|
subprocess.check_call(["git", "-c", "annex.commitmessage=updates before running migrate-mark-dead.py", "annex", "merge"])
|
||||||
annex_no_autocommit = ["git", "-c", "annex.alwayscommit=false", "annex"]
|
annex_no_autocommit = ["git", "-c", "annex.alwayscommit=false", "annex"]
|
||||||
# Network first, to ensure the password prompts come fast even when most files are dead already
|
# Network first, to ensure the password prompts come fast even when most files are dead already
|
||||||
for (key, remotes) in hashes_to_kill_remotes.items():
|
for (i, (key, remotes)) in enumerate(hashes_to_kill_remotes.items()):
|
||||||
for r in remotes:
|
for r in remotes:
|
||||||
if r is None:
|
if r is None:
|
||||||
# Can't be run with `--from here`
|
# Can't be run with `--from here`
|
||||||
subprocess.check_call(annex_no_autocommit + ['drop', '--key', key])
|
subprocess.check_call(annex_no_autocommit + ['drop', '--force', '--key', key])
|
||||||
else:
|
else:
|
||||||
subprocess.check_call(annex_no_autocommit + ['drop', '--force', '--key', key, '--from', r])
|
subprocess.check_call(annex_no_autocommit + ['drop', '--force', '--key', key, '--from', r])
|
||||||
|
|
||||||
for key in hashes_to_kill:
|
if (i % 10 == 0):
|
||||||
|
print(f"Dropped {i} ({100 * i/len(hashes_to_kill_remotes):.1f}% of) present hashes")
|
||||||
|
for i, key in enumerate(hashes_to_kill):
|
||||||
subprocess.check_call(annex_no_autocommit + ['dead', '--key', key])
|
subprocess.check_call(annex_no_autocommit + ['dead', '--key', key])
|
||||||
|
if (i % 100 == 0):
|
||||||
|
print(f"Marked {i} ({100 * i/len(hashes_to_kill):.1f}% of) unused hashes as dead")
|
||||||
finally:
|
finally:
|
||||||
subprocess.check_call(["git", "-c", "annex.commitmessage=ran migrate-mark-dead.py", "annex", "merge"])
|
subprocess.check_call(["git", "-c", "annex.commitmessage=ran migrate-mark-dead.py", "annex", "merge"])
|
||||||
```
|
```
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue