2014-12-11 22:09:44 +00:00
|
|
|
#!/bin/sh
|
|
|
|
# This is a demo git-annex external special remote program,
|
|
|
|
# which adds basic torrent download support to git-annex.
|
|
|
|
#
|
|
|
|
# Uses aria2c. Also needs the original bittorrent (or bittornado) for the
|
|
|
|
# btshowmetainfo command.
|
|
|
|
#
|
|
|
|
# Install in PATH as git-annex-remote-torrent
|
|
|
|
#
|
|
|
|
# Enable remote by running:
|
|
|
|
# git annex initremote torrent type=external encryption=none externaltype=torrent
|
|
|
|
# git annex untrust torrent
|
|
|
|
#
|
|
|
|
# Copyright 2014 Joey Hess; licenced under the GNU GPL version 3 or higher.
|
|
|
|
|
|
|
|
set -e
|
|
|
|
|
|
|
|
# This program speaks a line-based protocol on stdin and stdout.
|
|
|
|
# When running any commands, their stdout should be redirected to stderr
|
|
|
|
# (or /dev/null) to avoid messing up the protocol.
|
|
|
|
runcmd () {
|
|
|
|
"$@" >&2
|
|
|
|
}
|
|
|
|
|
|
|
|
# Gets a VALUE response and stores it in $RET
|
|
|
|
getvalue () {
|
|
|
|
read resp
|
|
|
|
# Tricky POSIX shell code to split first word of the resp,
|
|
|
|
# preserving all other whitespace
|
|
|
|
case "${resp%% *}" in
|
|
|
|
VALUE)
|
|
|
|
RET="$(echo "$resp" | sed 's/^VALUE \?//')"
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
RET=""
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
}
|
|
|
|
|
|
|
|
# Get a list of all known torrent urls for a key,
|
|
|
|
# storing it in a temp file.
|
|
|
|
geturls () {
|
|
|
|
key="$1"
|
|
|
|
tmp="$2"
|
|
|
|
|
|
|
|
echo GETURLS "$key"
|
|
|
|
getvalue
|
|
|
|
while [ -n "$RET" ]; do
|
|
|
|
if istorrent "$RET"; then
|
|
|
|
echo "$RET" >> "$tmp"
|
|
|
|
fi
|
|
|
|
getvalue
|
|
|
|
done
|
|
|
|
}
|
|
|
|
|
|
|
|
# Does the url end in .torrent?
|
|
|
|
# Note that we use #N on the url to indicate which file
|
|
|
|
# from a multi-file torrent is wanted.
|
|
|
|
istorrent () {
|
|
|
|
echo "$1" | egrep -q "\.torrent(#.*)?$"
|
|
|
|
}
|
|
|
|
|
|
|
|
# Download a single file from a torrent.
|
|
|
|
#
|
|
|
|
# Note: Does not support resuming interrupted transfers.
|
|
|
|
# Note: Does not feed progress info back to git-annex, and since
|
|
|
|
# the destination file is only populated at the end, git-annex will fail
|
|
|
|
# to display a progress bar for this download.
|
|
|
|
downloadtorrent () {
|
|
|
|
torrent="$1"
|
|
|
|
n="$2"
|
|
|
|
dest="$3"
|
|
|
|
|
|
|
|
tmpdir="$(mktemp -d)"
|
|
|
|
|
|
|
|
# aria2c will create part of the directory structure
|
|
|
|
# contained in the torrent. It may download parts of other files
|
|
|
|
# in addition to the one we asked for. So, we need to find
|
|
|
|
# out the filename we want, and look for it.
|
2014-12-11 23:48:00 +00:00
|
|
|
wantdir="$(btshowmetainfo "$torrent" | grep "^directory name: " | sed "s/^directory name: //" || true)"
|
|
|
|
if [ -n "$wantdir" ]; then
|
|
|
|
wantfile="$(btshowmetainfo "$torrent" | grep '^ ' | sed 's/^ //' | head -n "$n" | tail -n 1 | sed 's/ ([0-9]*)$//')"
|
|
|
|
if ! runcmd aria2c --select-file="$n" "$torrent" -d "$tmpdir"; then
|
|
|
|
false
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
wantfile="$(btshowmetainfo "$torrent" | egrep "^file name.*: " | sed "s/^file name.*: //")"
|
|
|
|
wantdir=.
|
|
|
|
if ! runcmd aria2c "$torrent" -d "$tmpdir"; then
|
|
|
|
false
|
|
|
|
fi
|
|
|
|
fi
|
2014-12-11 22:09:44 +00:00
|
|
|
if [ -e "$tmpdir/$wantdir/$wantfile" ]; then
|
|
|
|
mv "$tmpdir/$wantdir/$wantfile" "$dest"
|
|
|
|
rm -rf "$tmpdir"
|
|
|
|
else
|
|
|
|
rm -rf "$tmpdir"
|
|
|
|
false
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
# This has to come first, to get the protocol started.
|
2023-03-28 21:00:08 +00:00
|
|
|
echo VERSION 2
|
2014-12-11 22:09:44 +00:00
|
|
|
|
|
|
|
while read line; do
|
|
|
|
set -- $line
|
|
|
|
case "$1" in
|
|
|
|
INITREMOTE)
|
|
|
|
echo INITREMOTE-SUCCESS
|
|
|
|
;;
|
|
|
|
PREPARE)
|
|
|
|
echo PREPARE-SUCCESS
|
|
|
|
;;
|
|
|
|
CLAIMURL)
|
|
|
|
url="$2"
|
|
|
|
if istorrent "$url"; then
|
|
|
|
echo CLAIMURL-SUCCESS
|
|
|
|
else
|
|
|
|
echo CLAIMURL-FAILURE
|
|
|
|
fi
|
|
|
|
;;
|
|
|
|
CHECKURL)
|
|
|
|
url="$2"
|
|
|
|
# List contents of torrent.
|
|
|
|
tmp=$(mktemp)
|
limit url downloads to whitelisted schemes
Security fix! Allowing any schemes, particularly file: and
possibly others like scp: allowed file exfiltration by anyone who had
write access to the git repository, since they could add an annexed file
using such an url, or using an url that redirected to such an url,
and wait for the victim to get it into their repository and send them a copy.
* Added annex.security.allowed-url-schemes setting, which defaults
to only allowing http and https URLs. Note especially that file:/
is no longer enabled by default.
* Removed annex.web-download-command, since its interface does not allow
supporting annex.security.allowed-url-schemes across redirects.
If you used this setting, you may want to instead use annex.web-options
to pass options to curl.
With annex.web-download-command removed, nearly all url accesses in
git-annex are made via Utility.Url via http-client or curl. http-client
only supports http and https, so no problem there.
(Disabling one and not the other is not implemented.)
Used curl --proto to limit the allowed url schemes.
Note that this will cause git annex fsck --from web to mark files using
a disallowed url scheme as not being present in the web. That seems
acceptable; fsck --from web also does that when a web server is not available.
youtube-dl already disabled file: itself (probably for similar
reasons). The scheme check was also added to youtube-dl urls for
completeness, although that check won't catch any redirects it might
follow. But youtube-dl goes off and does its own thing with other
protocols anyway, so that's fine.
Special remotes that support other domain-specific url schemes are not
affected by this change. In the bittorrent remote, aria2c can still
download magnet: links. The download of the .torrent file is
otherwise now limited by annex.security.allowed-url-schemes.
This does not address any external special remotes that might download
an url themselves. Current thinking is all external special remotes will
need to be audited for this problem, although many of them will use
http libraries that only support http and not curl's menagarie.
The related problem of accessing private localhost and LAN urls is not
addressed by this commit.
This commit was sponsored by Brett Eisenberg on Patreon.
2018-06-15 20:52:24 +00:00
|
|
|
if ! runcmd curl --proto -all,http,https -o "$tmp" "$url"; then
|
2014-12-11 22:09:44 +00:00
|
|
|
echo CHECKURL-FAILURE
|
|
|
|
else
|
|
|
|
oldIFS="$IFS"
|
|
|
|
IFS="
|
|
|
|
"
|
|
|
|
printf "CHECKURL-MULTI"
|
|
|
|
n=0
|
|
|
|
for l in $(btshowmetainfo "$tmp" | grep '^ ' | sed 's/^ //'); do
|
|
|
|
# Note that the file cannot contain spaces.
|
|
|
|
file="$(echo "$l" | sed 's/ ([0-9]*)$//' | sed 's/ /_/g')"
|
|
|
|
size="$(echo "$l" | sed 's/.* (\([0-9]*\))$/\1/')"
|
|
|
|
n=$(expr $n + 1)
|
|
|
|
printf " $url#$n $size $file"
|
|
|
|
done
|
2014-12-11 23:48:00 +00:00
|
|
|
if [ "$n" = 0 ]; then
|
|
|
|
file="$(btshowmetainfo "$tmp" | egrep "^file name.*: " | sed "s/^file name.*: //")"
|
|
|
|
size="$(btshowmetainfo "$tmp" | egrep "^file size.*: " | sed "s/^file size.*: \([0-9]*\).*/\1/")"
|
|
|
|
printf " $url $size $file"
|
|
|
|
fi
|
2014-12-11 22:09:44 +00:00
|
|
|
printf "\n"
|
|
|
|
IFS="$oldIFS"
|
|
|
|
fi
|
|
|
|
rm -f "$tmp"
|
|
|
|
;;
|
|
|
|
TRANSFER)
|
2017-08-17 20:20:09 +00:00
|
|
|
op="$2"
|
2014-12-11 22:09:44 +00:00
|
|
|
key="$3"
|
2017-08-17 20:20:09 +00:00
|
|
|
shift 3
|
|
|
|
file="$@"
|
|
|
|
case "$op" in
|
2014-12-11 22:09:44 +00:00
|
|
|
STORE)
|
2014-12-11 22:26:29 +00:00
|
|
|
echo TRANSFER-FAILURE STORE "$key" "upload not supported"
|
2014-12-11 22:09:44 +00:00
|
|
|
;;
|
|
|
|
RETRIEVE)
|
|
|
|
urltmp=$(mktemp)
|
|
|
|
geturls "$key" "$urltmp"
|
|
|
|
url="$(head "$urltmp")" || true
|
|
|
|
rm -f "$urltmp"
|
|
|
|
if [ -z "$url" ]; then
|
|
|
|
echo TRANSFER-FAILURE RETRIEVE "$key" "no known torrent urls for this key"
|
|
|
|
else
|
|
|
|
tmp=$(mktemp)
|
limit url downloads to whitelisted schemes
Security fix! Allowing any schemes, particularly file: and
possibly others like scp: allowed file exfiltration by anyone who had
write access to the git repository, since they could add an annexed file
using such an url, or using an url that redirected to such an url,
and wait for the victim to get it into their repository and send them a copy.
* Added annex.security.allowed-url-schemes setting, which defaults
to only allowing http and https URLs. Note especially that file:/
is no longer enabled by default.
* Removed annex.web-download-command, since its interface does not allow
supporting annex.security.allowed-url-schemes across redirects.
If you used this setting, you may want to instead use annex.web-options
to pass options to curl.
With annex.web-download-command removed, nearly all url accesses in
git-annex are made via Utility.Url via http-client or curl. http-client
only supports http and https, so no problem there.
(Disabling one and not the other is not implemented.)
Used curl --proto to limit the allowed url schemes.
Note that this will cause git annex fsck --from web to mark files using
a disallowed url scheme as not being present in the web. That seems
acceptable; fsck --from web also does that when a web server is not available.
youtube-dl already disabled file: itself (probably for similar
reasons). The scheme check was also added to youtube-dl urls for
completeness, although that check won't catch any redirects it might
follow. But youtube-dl goes off and does its own thing with other
protocols anyway, so that's fine.
Special remotes that support other domain-specific url schemes are not
affected by this change. In the bittorrent remote, aria2c can still
download magnet: links. The download of the .torrent file is
otherwise now limited by annex.security.allowed-url-schemes.
This does not address any external special remotes that might download
an url themselves. Current thinking is all external special remotes will
need to be audited for this problem, although many of them will use
http libraries that only support http and not curl's menagarie.
The related problem of accessing private localhost and LAN urls is not
addressed by this commit.
This commit was sponsored by Brett Eisenberg on Patreon.
2018-06-15 20:52:24 +00:00
|
|
|
if ! runcmd curl --proto -all,http,https -o "$tmp" "$url"; then
|
2014-12-11 22:09:44 +00:00
|
|
|
echo TRANSFER-FAILURE RETRIEVE "$key" "failed downloading torrent file from $url"
|
|
|
|
else
|
2014-12-11 23:48:00 +00:00
|
|
|
filenum="$(echo "$url" | sed 's/(.*#\(\d*\)/\1/')"
|
2014-12-11 22:09:44 +00:00
|
|
|
if downloadtorrent "$tmp" "$filenum" "$file"; then
|
|
|
|
echo TRANSFER-SUCCESS RETRIEVE "$key"
|
|
|
|
else
|
|
|
|
echo TRANSFER-FAILURE RETRIEVE "$key" "failed to download torrent contents from $url"
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
rm -f "$tmp"
|
|
|
|
fi
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
;;
|
|
|
|
CHECKPRESENT)
|
|
|
|
key="$2"
|
2014-12-17 19:39:35 +00:00
|
|
|
# Let's just assume that torrents are never present
|
2014-12-11 22:09:44 +00:00
|
|
|
# for simplicity.
|
2014-12-17 19:39:35 +00:00
|
|
|
echo CHECKPRESENT-UNKNOWN "$key" "cannot reliably check torrent status"
|
2014-12-11 22:09:44 +00:00
|
|
|
;;
|
|
|
|
REMOVE)
|
|
|
|
key="$2"
|
|
|
|
# Remove all torrent urls for the key.
|
|
|
|
tmp=$(mktemp)
|
|
|
|
geturls "$key" "$tmp"
|
|
|
|
for url in $(cat "$tmp"); do
|
|
|
|
echo SETURLMISSING "$key" "$url"
|
|
|
|
done
|
|
|
|
rm -f "$tmp"
|
|
|
|
echo REMOVE-SUCCESS "$key"
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
echo UNSUPPORTED-REQUEST
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
done
|