Shell 脚本,用来获取基于 moebooru 的网站上的原图链接,如 yande.re、konachan.com 等。
下载
$ curl -O "https://github.com/qianbinbin/moebooru-crawler/raw/master/moebooru-crawler.sh"
$ # git clone git@github.com:qianbinbin/moebooru-crawler.git && cd moebooru-crawler
$ chmod +x ./moebooru-crawler.sh
使用
Usage: moebooru-crawler URL [ -n NUM, --num=NUM ]
-n NUM, --num=NUM print NUM links of images,
or print all if NUM is '0'
示例
获取指定页面上的图片链接
$ ./moebooru-crawler.sh "https://yande.re/post?tags=coffee-kizoku+order%3Ascore"
将链接保存到文件
$ ./moebooru-crawler.sh "https://yande.re/post?tags=coffee-kizoku+order%3Ascore" >>links.txt
然后可以用 aria2c 之类的工具批量下载。
获取指定数量的图片(当页面多于一页时)
$ ./moebooru-crawler.sh "https://yande.re/post?page=2&tags=coffee-kizoku" -n 100 # "page=2" 会被忽略
要下载所有页面上的图片,使用 -n 0
即可:
$ ./moebooru-crawler.sh "https://yande.re/post?tags=coffee-kizoku" -n 0
源码
https://github.com/qianbinbin/moebooru-crawler
#!/usr/bin/env sh
NUM=
USAGE=$(
cat <<-END
Usage: moebooru-crawler URL [ -n NUM, --num=NUM ]
-n NUM, --num=NUM print NUM links of images,
or print all if NUM is '0'
END
)
error() { echo "$@" >&2; }
_exit() {
error "$USAGE"
exit 2
}
while [ $# -gt 0 ]; do
case "$1" in
-n | --num)
[ -n "$2" ] || _exit
NUM="$2"
shift 2
;;
-n=* | --num=*)
NUM="${1#*=}"
shift
;;
-*)
_exit
;;
*)
[ -z "$URL" ] || _exit
URL="$1"
shift
;;
esac
done
if [ -n "$NUM" ]; then
[ "$NUM" -ge 0 ] 2>/dev/null || _exit
fi
[ -n "$URL" ] || _exit
get_links() {
content=$(curl -fsSL "$1")
echo "$content" | grep -o 'file_url="[^"]*' | grep -o 'http[^"]*'
}
if echo "$URL" | grep -qs '?'; then
path=${URL%%\?*}.xml
query=${URL#*\?}
else
path="$URL.xml"
query=
fi
links=
if [ -z "$NUM" ]; then
url="$path"
[ -n "$query" ] && url="$url?$query"
links=$(get_links "$url")
else
query=$(echo "$query" | sed "s/&\?page=[0-9]*//g")
[ -n "$query" ] && query="$query&"
page=1
while [ "$NUM" -eq 0 ] || [ "$(echo "$links" | wc -w)" -lt "$NUM" ]; do
p="page=$page"
url="$path?$query$p"
_links=$(get_links "$url")
[ "$(echo "$_links" | wc -w)" -eq 0 ] && break
links="$links $_links"
: $((page = page + 1))
done
links=$(echo "$links" | xargs -n 1)
[ "$NUM" -eq 0 ] || links=$(echo "$links" | head -n "$NUM")
fi
echo "$links"