Merge branch 'master' of ssh://git-annex.branchable.com
This commit is contained in:
commit
6eb0943d95
9 changed files with 386 additions and 0 deletions
164
doc/bugs/blake3_hash_support.mdwn
Normal file
164
doc/bugs/blake3_hash_support.mdwn
Normal file
|
@ -0,0 +1,164 @@
|
|||
This is a patch that seems to work for my personal use.
|
||||
BLAKE3 does support variable lengths, but my code does not implement support for anything other than 256-bit (32-byte) digests.
|
||||
I'm not familiar enough with the codebase to be sure whether adding variable length support later is a backwards compatibility hazard or not.
|
||||
|
||||
[[!format patch """
|
||||
From efa115d94d1a5a52574d5760c6e951ed3c518667 Mon Sep 17 00:00:00 2001
|
||||
From: edef <edef@edef.eu>
|
||||
Date: Fri, 2 Dec 2022 12:16:44 +0000
|
||||
Subject: [PATCH] support BLAKE3
|
||||
|
||||
This uses the blake3 package from Hackage, since cryptonite does not
|
||||
have BLAKE3 support yet.
|
||||
|
||||
diff --git a/Backend/Hash.hs b/Backend/Hash.hs
|
||||
index 550d8fc6c..809a82599 100644
|
||||
--- a/Backend/Hash.hs
|
||||
+++ b/Backend/Hash.hs
|
||||
@@ -27,8 +27,11 @@ import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Short as S (toShort, fromShort)
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
+import Data.IORef
|
||||
+import Control.Arrow
|
||||
import Control.DeepSeq
|
||||
import Control.Exception (evaluate)
|
||||
+import qualified BLAKE3
|
||||
|
||||
data Hash
|
||||
= MD5Hash
|
||||
@@ -40,6 +43,7 @@ data Hash
|
||||
| Blake2bpHash HashSize
|
||||
| Blake2sHash HashSize
|
||||
| Blake2spHash HashSize
|
||||
+ | Blake3Hash
|
||||
|
||||
cryptographicallySecure :: Hash -> Bool
|
||||
cryptographicallySecure (SHA2Hash _) = True
|
||||
@@ -49,6 +53,7 @@ cryptographicallySecure (Blake2bHash _) = True
|
||||
cryptographicallySecure (Blake2bpHash _) = True
|
||||
cryptographicallySecure (Blake2sHash _) = True
|
||||
cryptographicallySecure (Blake2spHash _) = True
|
||||
+cryptographicallySecure Blake3Hash = True
|
||||
cryptographicallySecure SHA1Hash = False
|
||||
cryptographicallySecure MD5Hash = False
|
||||
|
||||
@@ -63,6 +68,7 @@ hashes = concat
|
||||
, map (Blake2bpHash . HashSize) [512]
|
||||
, map (Blake2sHash . HashSize) [256, 160, 224]
|
||||
, map (Blake2spHash . HashSize) [256, 224]
|
||||
+ , [Blake3Hash]
|
||||
, [SHA1Hash]
|
||||
, [MD5Hash]
|
||||
]
|
||||
@@ -99,6 +105,7 @@ hashKeyVariety (Blake2bHash size) he = Blake2bKey size he
|
||||
hashKeyVariety (Blake2bpHash size) he = Blake2bpKey size he
|
||||
hashKeyVariety (Blake2sHash size) he = Blake2sKey size he
|
||||
hashKeyVariety (Blake2spHash size) he = Blake2spKey size he
|
||||
+hashKeyVariety Blake3Hash he = Blake3Key he
|
||||
|
||||
{- A key is a hash of its contents. -}
|
||||
keyValue :: Hash -> KeySource -> MeterUpdate -> Annex Key
|
||||
@@ -219,6 +226,7 @@ hasher (Blake2bHash hashsize) = blake2bHasher hashsize
|
||||
hasher (Blake2bpHash hashsize) = blake2bpHasher hashsize
|
||||
hasher (Blake2sHash hashsize) = blake2sHasher hashsize
|
||||
hasher (Blake2spHash hashsize) = blake2spHasher hashsize
|
||||
+hasher Blake3Hash = blake3Hasher
|
||||
|
||||
mkHasher :: HashAlgorithm h => (L.ByteString -> Digest h) -> Context h -> Hasher
|
||||
mkHasher h c = (show . h, mkIncrementalVerifier c descChecksum . sameCheckSum)
|
||||
@@ -272,6 +280,27 @@ blake2spHasher (HashSize hashsize)
|
||||
| hashsize == 224 = mkHasher blake2sp_224 blake2sp_224_context
|
||||
| otherwise = error $ "unsupported BLAKE2SP size " ++ show hashsize
|
||||
|
||||
+blake3Hasher :: Hasher
|
||||
+blake3Hasher = (hash, incremental) where
|
||||
+ finalize :: BLAKE3.Hasher -> BLAKE3.Digest BLAKE3.DEFAULT_DIGEST_LEN
|
||||
+ finalize = BLAKE3.finalize
|
||||
+
|
||||
+ hash :: L.ByteString -> String
|
||||
+ hash = show . finalize . L.foldlChunks ((. pure) . BLAKE3.update) BLAKE3.hasher
|
||||
+
|
||||
+ incremental :: Key -> IO IncrementalVerifier
|
||||
+ incremental k = do
|
||||
+ v <- newIORef (Just (BLAKE3.hasher, 0))
|
||||
+ return $ IncrementalVerifier
|
||||
+ { updateIncrementalVerifier = \b ->
|
||||
+ modifyIORef' v . fmap $ flip BLAKE3.update [b] *** (fromIntegral (S.length b) +)
|
||||
+ , finalizeIncrementalVerifier =
|
||||
+ fmap (sameCheckSum k . show . finalize . fst) <$> readIORef v
|
||||
+ , unableIncrementalVerifier = writeIORef v Nothing
|
||||
+ , positionIncrementalVerifier = fmap snd <$> readIORef v
|
||||
+ , descIncrementalVerifier = descChecksum
|
||||
+ }
|
||||
+
|
||||
sha1Hasher :: Hasher
|
||||
sha1Hasher = mkHasher sha1 sha1_context
|
||||
|
||||
diff --git a/Types/Key.hs b/Types/Key.hs
|
||||
index 271723982..ea71f85ed 100644
|
||||
--- a/Types/Key.hs
|
||||
+++ b/Types/Key.hs
|
||||
@@ -214,6 +214,7 @@ data KeyVariety
|
||||
| Blake2bpKey HashSize HasExt
|
||||
| Blake2sKey HashSize HasExt
|
||||
| Blake2spKey HashSize HasExt
|
||||
+ | Blake3Key HasExt
|
||||
| SHA1Key HasExt
|
||||
| MD5Key HasExt
|
||||
| WORMKey
|
||||
@@ -247,6 +248,7 @@ hasExt (Blake2bKey _ (HasExt b)) = b
|
||||
hasExt (Blake2bpKey _ (HasExt b)) = b
|
||||
hasExt (Blake2sKey _ (HasExt b)) = b
|
||||
hasExt (Blake2spKey _ (HasExt b)) = b
|
||||
+hasExt (Blake3Key (HasExt b)) = b
|
||||
hasExt (SHA1Key (HasExt b)) = b
|
||||
hasExt (MD5Key (HasExt b)) = b
|
||||
hasExt WORMKey = False
|
||||
@@ -262,6 +264,7 @@ sameExceptExt (Blake2bKey sz1 _) (Blake2bKey sz2 _) = sz1 == sz2
|
||||
sameExceptExt (Blake2bpKey sz1 _) (Blake2bpKey sz2 _) = sz1 == sz2
|
||||
sameExceptExt (Blake2sKey sz1 _) (Blake2sKey sz2 _) = sz1 == sz2
|
||||
sameExceptExt (Blake2spKey sz1 _) (Blake2spKey sz2 _) = sz1 == sz2
|
||||
+sameExceptExt (Blake3Key _) (Blake3Key _) = True
|
||||
sameExceptExt (SHA1Key _) (SHA1Key _) = True
|
||||
sameExceptExt (MD5Key _) (MD5Key _) = True
|
||||
sameExceptExt _ _ = False
|
||||
@@ -275,6 +278,7 @@ formatKeyVariety v = case v of
|
||||
Blake2bpKey sz e -> adde e (addsz sz "BLAKE2BP")
|
||||
Blake2sKey sz e -> adde e (addsz sz "BLAKE2S")
|
||||
Blake2spKey sz e -> adde e (addsz sz "BLAKE2SP")
|
||||
+ Blake3Key e -> adde e "BLAKE3"
|
||||
SHA1Key e -> adde e "SHA1"
|
||||
MD5Key e -> adde e "MD5"
|
||||
WORMKey -> "WORM"
|
||||
@@ -337,6 +341,8 @@ parseKeyVariety "BLAKE2SP224" = Blake2spKey (HashSize 224) (HasExt False)
|
||||
parseKeyVariety "BLAKE2SP224E" = Blake2spKey (HashSize 224) (HasExt True)
|
||||
parseKeyVariety "BLAKE2SP256" = Blake2spKey (HashSize 256) (HasExt False)
|
||||
parseKeyVariety "BLAKE2SP256E" = Blake2spKey (HashSize 256) (HasExt True)
|
||||
+parseKeyVariety "BLAKE3" = Blake3Key (HasExt False)
|
||||
+parseKeyVariety "BLAKE3E" = Blake3Key (HasExt True)
|
||||
parseKeyVariety "SHA1" = SHA1Key (HasExt False)
|
||||
parseKeyVariety "SHA1E" = SHA1Key (HasExt True)
|
||||
parseKeyVariety "MD5" = MD5Key (HasExt False)
|
||||
diff --git a/git-annex.cabal b/git-annex.cabal
|
||||
index cd58a4ca3..7c251e33b 100644
|
||||
--- a/git-annex.cabal
|
||||
+++ b/git-annex.cabal
|
||||
@@ -362,6 +362,7 @@ Executable git-annex
|
||||
securemem,
|
||||
crypto-api,
|
||||
cryptonite (>= 0.23),
|
||||
+ blake3,
|
||||
memory,
|
||||
deepseq,
|
||||
split,
|
||||
diff --git a/stack.yaml b/stack.yaml
|
||||
index 7dbfb657a..936ee841b 100644
|
||||
--- a/stack.yaml
|
||||
+++ b/stack.yaml
|
||||
@@ -25,3 +25,4 @@ extra-deps:
|
||||
- base64-bytestring-1.0.0.3
|
||||
- bencode-0.6.1.1
|
||||
- http-client-0.7.9
|
||||
+- blake3-0.2@sha256:d1146b9a51ccfbb0532780778b6d016a614e3d44c05d8c1923dde9a8be869045,2448
|
||||
"""]]
|
52
doc/bugs/fsck_--json_incomplete_error_reporting.mdwn
Normal file
52
doc/bugs/fsck_--json_incomplete_error_reporting.mdwn
Normal file
|
@ -0,0 +1,52 @@
|
|||
### Please describe the problem.
|
||||
|
||||
I ran git annex fsck --json. I have some files that are lacking numcopies. I get output in stderr, but no error details in the json body.
|
||||
|
||||
For example:
|
||||
|
||||
{"command":"fsck","success":false,"key":"SHA256E-s165540--ddcf7ce58593667e1b836e2a7f28a9f5227f3d9ba46cf8f98c7ab9dd26ef1896.jpg","error-messages":[],"file":"2022/12/04/chandrian_10:06:41.jpg","dead":[],"untrusted":[],"input":["2022/12/04/chandrian_10:06:41.jpg"]}
|
||||
Only 2 of 5 trustworthy copies exist of 2022/12/04/chandrian_11:05:20.jpg
|
||||
Back it up with git-annex copy.
|
||||
|
||||
|
||||
It would be great if error-messages contained all the details for failures. Thank you
|
||||
|
||||
|
||||
### What steps will reproduce the problem?
|
||||
|
||||
* create a new repo
|
||||
* annex a file
|
||||
* set numcopies to 2 or whatever
|
||||
* git annex fsck --json
|
||||
|
||||
|
||||
### What version of git-annex are you using? On what operating system?
|
||||
|
||||
I'm on Fedora 37.
|
||||
|
||||
git annex version
|
||||
git-annex version: 10.20221103
|
||||
build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite S3 WebDAV
|
||||
dependency versions: aws-0.22 bloomfilter-2.0.1.0 cryptonite-0.29 DAV-1.3.4 feed-1.3.2.0 ghc-8.10.7 http-client-0.6.4.1 persistent-sqlite-2.13.1.0 torrent-10000.1.1 uuid-1.3.15 yesod-1.6.2
|
||||
key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL X*
|
||||
remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg hook external
|
||||
operating system: linux x86_64
|
||||
supported repository versions: 8 9 10
|
||||
upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10
|
||||
local repository version: 10
|
||||
|
||||
|
||||
### Please provide any additional information below.
|
||||
|
||||
[[!format sh """
|
||||
# If you can, paste a complete transcript of the problem occurring here.
|
||||
# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log
|
||||
|
||||
|
||||
# End of transcript or log.
|
||||
"""]]
|
||||
|
||||
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
|
||||
|
||||
Yes! Git annex is amazing and is managing over 10 TB of data across 5 git annexes and around 10 hard drives. No data loss this entire time -- over 8 years.
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
[[!comment format=mdwn
|
||||
username="kanak@3c4f6e7d832d88751c617b25bdbac896417eb93b"
|
||||
nickname="kanak"
|
||||
avatar="http://cdn.libravatar.org/avatar/708121dfec06e554300b2a3a73a26818"
|
||||
subject="comment 1"
|
||||
date="2022-12-04T17:40:38Z"
|
||||
content="""
|
||||
Not just limited to numcopies:
|
||||
|
||||
{\"command\":\"fsck\",\"success\":true,\"key\":\"SHA256E-s119046--239da5a85ddf8c4071d8803a864a896d13e2a2fd65fd5684fc2f6dcaf264e875.jpg\",\"error-messages\":[],\"file\":\"12/03/chandrian_22:37:41.jpg\",\"note\":\"checksum...\",\"input
|
||||
\":[\"12/03/chandrian_22:37:41.jpg\"]}
|
||||
** Based on the location log, 12/03/chandrian_23:05:41.jpg
|
||||
** was expected to be present, but its content is missing.
|
||||
|
||||
|
||||
|
||||
"""]]
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="yarikoptic"
|
||||
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
|
||||
subject="comment 3"
|
||||
date="2022-11-29T22:25:42Z"
|
||||
content="""
|
||||
> I have re-ran the command to see if the bug replicates..
|
||||
|
||||
note: it was considerable amount of time (days?) for it to take there ;) I made a copy `test_fs_testonly.py` where I removed all other \"benchmarks\" prior running `annex test` -- might get there faster if works so feel free to interrupt and rerun that one. That code is old (circa 2014 ;)) , and I should do some face lift but haven't had a chance yet :-/
|
||||
"""]]
|
26
doc/forum/Editing_Metadata_in_your___36__EDITOR.mdwn
Normal file
26
doc/forum/Editing_Metadata_in_your___36__EDITOR.mdwn
Normal file
|
@ -0,0 +1,26 @@
|
|||
Hey everyone,
|
||||
|
||||
I made a Python script that launches your `$EDITOR` (or `$VISUAL`) to conveniently edit git-annex metadata.
|
||||
|
||||
Code is [on Gitlab](https://gitlab.com/nobodyinperson/git-annex-metadata-edit).
|
||||
|
||||
## 📥 Installation
|
||||
|
||||
```bash
|
||||
# Installation
|
||||
pip install git+https://gitlab.com/nobodyinperson/git-annex-metadata-edit
|
||||
```
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- Operate on multiple files (recursively)
|
||||
- overwrite/remove a metadata field
|
||||
- add/remove specific values from a field in one go
|
||||
|
||||
## 📟 📹 Screencast
|
||||
|
||||
[![asciicast](https://asciinema.org/a/541576.svg)](https://asciinema.org/a/541576?autoplay=1)
|
||||
|
||||
Cheers, 👍
|
||||
|
||||
Yann
|
8
doc/forum/View_for_locally_existing_files.mdwn
Normal file
8
doc/forum/View_for_locally_existing_files.mdwn
Normal file
|
@ -0,0 +1,8 @@
|
|||
Hi,
|
||||
|
||||
Is there a way to make a "view" that only shows me files that are locally existing (meaning: where the binary is present in the repo I am working on)?
|
||||
|
||||
I had a look at documentation and the forum but I did not find anything fitting (I honestly assume that I overlooked it somewhere). To me views, vfilter and such do not appear to meet my needs since they are only working on metadata.
|
||||
|
||||
Why do I need this?
|
||||
I am starting to manage my music collection with git annex. On most devices I only have a fraction of the globally available collection. I would like to see only those symlinks that actually lead to binary files. This way I would not to confuse music players with broken symlinks and know at a glance what I can actually listen to right now.
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="Lukey"
|
||||
avatar="http://cdn.libravatar.org/avatar/c7c08e2efd29c692cc017c4a4ca3406b"
|
||||
subject="comment 1"
|
||||
date="2022-12-01T20:49:35Z"
|
||||
content="""
|
||||
`git annex adjust --hide-missing`
|
||||
"""]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="agschaid"
|
||||
avatar="http://cdn.libravatar.org/avatar/7789d7511c5da25d71021be4ddb7fe18"
|
||||
subject="comment 2"
|
||||
date="2022-12-01T21:12:11Z"
|
||||
content="""
|
||||
Thank you! That is simply perfect.
|
||||
"""]]
|
|
@ -0,0 +1,93 @@
|
|||
Thank you for `git-annex`, it's awesome!
|
||||
|
||||
I recently figured I could add `git-annex metadata` to my research data files that contains the start and end date of timeseries data inside the files so a quick lookup by date range (”which files contain data in that time range”) is possible.
|
||||
|
||||
This is possible when using numeric timestamps (e.g. unix timestamp like `1669981463`) but not with stringy dates (e.g. `2022-11-12T20:10:14+0200`) as `--metadata fieldname>=VALUE` does _numeric_ comparison.
|
||||
|
||||
## Proposal: How about when `--metadata fieldname>=VALUE` falls back to string comparison when `VALUE` can't be parsed as a number?
|
||||
|
||||
## Test case
|
||||
|
||||
Consider this script `make-git-annex-dir-with-timestamps.sh`:
|
||||
|
||||
```sh
|
||||
#/bin/sh
|
||||
fmt="$1";test -n "$fmt" || fmt="%FT%T%z"
|
||||
# make a new git annex repository
|
||||
d=git-annex-with-times-"$fmt";chmod +w -R "$d";rm -rf "$d";mkdir "$d";cd "$d"
|
||||
git init
|
||||
git annex init
|
||||
# create some files
|
||||
for i in `seq 1 9`;do echo "File $i" > "file$i";done
|
||||
git annex add .
|
||||
git commit -m "Add files"
|
||||
# add metadata to files
|
||||
for i in `seq 1 9`;do
|
||||
time_start="$(date -d"$((-20 + $i)) hours" +"$fmt")"
|
||||
(set -x;git annex metadata --set time-start="$time_start" "file$i")
|
||||
time_end="$(date -d"$((-10 + $i)) hours" +"$fmt")"
|
||||
(set -x;git annex metadata --set time-end="$time_end" "file$i")
|
||||
done
|
||||
timerange_start="$(date -d "-16 hours -5 minutes" +"$fmt")"
|
||||
timerange_end="$(date -d "-12 hours +5 minutes" +"$fmt")"
|
||||
(
|
||||
set -x
|
||||
git annex find \
|
||||
"-(" --metadata "time-start>=$timerange_start" --and --metadata "time-start<=$timerange_end" "-)" \
|
||||
--or \
|
||||
"-(" --metadata "time-end>=$timerange_start" --and --metadata "time-end<=$timerange_end" "-)"
|
||||
)
|
||||
echo "⬆⬆⬆ This should only output file4 through file8 ⬆⬆⬆"
|
||||
```
|
||||
|
||||
Invoked with unix timestamps time format, it works as expected:
|
||||
|
||||
```sh
|
||||
> ./make-git-annex-dir-with-timestamps.sh '%s'
|
||||
# ...
|
||||
+ git annex find '-(' --metadata 'time-start>=1669923315' --and --metadata 'time-start<=1669938315' '-)' --or '-(' --metadata 'time-end>=1669923315' --and --metadata 'time-end<=1669938315' '-)'
|
||||
file4
|
||||
file5
|
||||
file6
|
||||
file7
|
||||
file8
|
||||
⬆⬆⬆ This should only output file4 through file8 ⬆⬆⬆
|
||||
```
|
||||
|
||||
However, other stringy date formats match all files:
|
||||
|
||||
```bash
|
||||
# typical ISO-ish time format
|
||||
> ./make-git-annex-dir-with-timestamps.sh "%FT%T%z"
|
||||
# ...
|
||||
+ git annex find '-(' --metadata 'time-start>=2022-12-01T20:49:37+0100' --and --metadata 'time-start<=2022-12-02T00:59:37+0100' '-)' --or '-(' --metadata 'time-end>=2022-12-01T20:49:37+0100' --and --metadata 'time-end<=2022-12-02T00:59:37+0100' '-)'
|
||||
file1
|
||||
file2
|
||||
file3
|
||||
file4
|
||||
file5
|
||||
file6
|
||||
file7
|
||||
file8
|
||||
file9
|
||||
⬆⬆⬆ This should only output file4 through file8 ⬆⬆⬆
|
||||
```
|
||||
|
||||
```sh
|
||||
# git-annex's own time format for 'FIELDNAME-lastchanged'
|
||||
> ./make-git-annex-dir-with-timestamps.sh "%Y-%m-%d@%H-%M-%S"
|
||||
# ...
|
||||
+ git annex find '-(' --metadata 'time-start>=2022-12-01@20-38-04' --and --metadata 'time-start<=2022-12-02@00-38-04' '-)' --or '-(' --metadata 'time-end>=2022-12-01@20-38-04' --and --metadata 'time-end<=2022-12-02@00-38-04' '-)'
|
||||
file1
|
||||
file2
|
||||
file3
|
||||
file4
|
||||
file5
|
||||
file6
|
||||
file7
|
||||
file8
|
||||
file9
|
||||
⬆⬆⬆ This should only output file4 through file8 ⬆⬆⬆
|
||||
```
|
||||
|
||||
Yann / @nobodyinperson
|
Loading…
Reference in a new issue