mirror of
https://github.com/papers-we-love/papers-we-love.git
synced 2024-10-27 20:34:20 +00:00
Removed dependency on xargs.
This commit is contained in:
parent
50a8173718
commit
3d4ed8cea8
@ -3,35 +3,44 @@
|
|||||||
# Guard clause check if required binaries are installed
|
# Guard clause check if required binaries are installed
|
||||||
which wget > /dev/null || { echo "Error: wget not installed." ; exit 1 ; }
|
which wget > /dev/null || { echo "Error: wget not installed." ; exit 1 ; }
|
||||||
which egrep > /dev/null || { echo "Error: egrep not installed." ; exit 1 ; }
|
which egrep > /dev/null || { echo "Error: egrep not installed." ; exit 1 ; }
|
||||||
which xargs > /dev/null || { echo "Error: xargs not installed." ; exit 1 ; }
|
|
||||||
|
|
||||||
# Recursively traverse directories in repo scraping markdown file for URLs
|
# Recursively traverse directories in repo scraping markdown file for URLs
|
||||||
# containing pdfs. Downloads pdfs into respective directories.
|
# containing pdfs. Downloads pdfs into respective directories.
|
||||||
download_for_directory() {
|
download_for_directory() {
|
||||||
cd $1
|
cd $1 || { echo "Error: directory not found." ; exit 1 ; }
|
||||||
|
|
||||||
for f in *; do
|
for f in *; do
|
||||||
if [ -d "$f" ]; then
|
if [[ -d ${f} ]]; then
|
||||||
download_for_directory $f &
|
download_for_directory ${f} &
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
ls | cat *.md 2> /dev/null \
|
# Scrape URLs from markdown files
|
||||||
| egrep -o 'https?://[^ ]+' \
|
urls=$(ls | cat *.md 2> /dev/null | egrep -o 'https?://[^ ]+' | grep 'pdf' | tr -d ')')
|
||||||
| grep 'pdf' | tr -d ')' \
|
|
||||||
| xargs --no-run-if-empty wget --no-clobber --quiet --timeout=5 --tries=2
|
for url in "$urls"; do
|
||||||
|
# Ignore empty URLs
|
||||||
|
if [[ ! -z ${url} ]]; then
|
||||||
|
wget ${url} --no-clobber --quiet --timeout=5 --tries=2
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
cd ..
|
cd ..
|
||||||
echo "$1 done."
|
echo "$1 done."
|
||||||
}
|
}
|
||||||
|
|
||||||
if [ "$#" -ne 1 ]; then
|
# If no directories are supplied, iterate over the entire repo.
|
||||||
BASEDIR="$(dirname $0)/.."
|
if [[ "$#" -ne 1 ]]; then
|
||||||
download_for_directory $BASEDIR
|
REPO_ROOT_DIR="$(dirname $0)/.."
|
||||||
|
download_for_directory ${REPO_ROOT_DIR}
|
||||||
else
|
else
|
||||||
|
# Iterate over the specified directories
|
||||||
for dir in "$@"
|
for dir in "$@"
|
||||||
do
|
do
|
||||||
download_for_directory $dir
|
download_for_directory ${dir}
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Wait for child processes to terminate
|
||||||
wait
|
wait
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user