From ae9ef1f8fe862951d1c2400d73089f603f418702 Mon Sep 17 00:00:00 2001 From: nobodyinperson Date: Fri, 2 Dec 2022 11:56:02 +0000 Subject: [PATCH] --- ...me__62____61__VALUE_string_comparison.mdwn | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 doc/todo/--metadata_fieldname__62____61__VALUE_string_comparison.mdwn diff --git a/doc/todo/--metadata_fieldname__62____61__VALUE_string_comparison.mdwn b/doc/todo/--metadata_fieldname__62____61__VALUE_string_comparison.mdwn new file mode 100644 index 0000000000..7c1d9b583e --- /dev/null +++ b/doc/todo/--metadata_fieldname__62____61__VALUE_string_comparison.mdwn @@ -0,0 +1,91 @@ +Thank you for `git-annex`, it's awesome! + +I recently figured I could add `git-annex metadata` to my research data files that contains the start and end date of timeseries data inside the files so a quick lookup by date range (”which files contain data in that time range”) is possible. + +This is possible when using numeric timestamps (e.g. unix timestamp like `1669981463`) but not with stringy dates (e.g. `2022-11-12T20:10:14+0200`) as `--metadata fieldname>=VALUE` does _numeric_ comparison. + +## Proposal: How about when `--metadata fieldname>=VALUE` falls back to string comparison when `VALUE` can't be parsed as a number? + +## Test case + +Consider this script `make-git-annex-dir-with-timestamps.sh`: + +```sh +#/bin/sh +fmt="$1";test -n "$fmt" || fmt="%FT%T%z" +# make a new git annex repository +d=git-annex-with-times-"$fmt";chmod +w -R "$d";rm -rf "$d";mkdir "$d";cd "$d" +git init +git annex init +# create some files +for i in `seq 1 9`;do echo "File $i" > "file$i";done +git annex add . +git commit -m "Add files" +# add metadata to files +for i in `seq 1 9`;do + time_start="$(date -d"$((-20 + $i)) hours" +"$fmt")" + (set -x;git annex metadata --set time-start="$time_start" "file$i") + time_end="$(date -d"$((-10 + $i)) hours" +"$fmt")" + (set -x;git annex metadata --set time-end="$time_end" "file$i") +done +timerange_start="$(date -d "-16 hours -5 minutes" +"$fmt")" +timerange_end="$(date -d "-12 hours +5 minutes" +"$fmt")" +( +set -x +git annex find \ + "-(" --metadata "time-start>=$timerange_start" --and --metadata "time-start<=$timerange_end" "-)" \ + --or \ + "-(" --metadata "time-end>=$timerange_start" --and --metadata "time-end<=$timerange_end" "-)" +) +echo "⬆⬆⬆ This should only output file4 through file8 ⬆⬆⬆" +``` + +Invoked with unix timestamps time format, it works as expected: + +```sh +> ./make-git-annex-dir-with-timestamps.sh '%s' +# ... ++ git annex find '-(' --metadata 'time-start>=1669923315' --and --metadata 'time-start<=1669938315' '-)' --or '-(' --metadata 'time-end>=1669923315' --and --metadata 'time-end<=1669938315' '-)' +file4 +file5 +file6 +file7 +file8 +⬆⬆⬆ This should only output file4 through file8 ⬆⬆⬆ +``` + +However, other stringy date formats match all files: + +```bash +# typical ISO-ish time format +> ./make-git-annex-dir-with-timestamps.sh "%FT%T%z" +# ... ++ git annex find '-(' --metadata 'time-start>=2022-12-01T20:49:37+0100' --and --metadata 'time-start<=2022-12-02T00:59:37+0100' '-)' --or '-(' --metadata 'time-end>=2022-12-01T20:49:37+0100' --and --metadata 'time-end<=2022-12-02T00:59:37+0100' '-)' +file1 +file2 +file3 +file4 +file5 +file6 +file7 +file8 +file9 +⬆⬆⬆ This should only output file4 through file8 ⬆⬆⬆ +``` + +```sh +# git-annex's own time format for 'FIELDNAME-lastchanged' +> ./make-git-annex-dir-with-timestamps.sh "%Y-%m-%d@%H-%M-%S" +# ... ++ git annex find '-(' --metadata 'time-start>=2022-12-01@20-38-04' --and --metadata 'time-start<=2022-12-02@00-38-04' '-)' --or '-(' --metadata 'time-end>=2022-12-01@20-38-04' --and --metadata 'time-end<=2022-12-02@00-38-04' '-)' +file1 +file2 +file3 +file4 +file5 +file6 +file7 +file8 +file9 +⬆⬆⬆ This should only output file8 and file9 ⬆⬆⬆ +```