Removed dependency on xargs.

This commit is contained in:
christos 2020-03-26 17:00:59 +00:00
parent 50a8173718
commit 3d4ed8cea8

View File

@ -3,35 +3,44 @@
# Guard clause check if required binaries are installed
which wget > /dev/null || { echo "Error: wget not installed." ; exit 1 ; }
which egrep > /dev/null || { echo "Error: egrep not installed." ; exit 1 ; }
which xargs > /dev/null || { echo "Error: xargs not installed." ; exit 1 ; }
# Recursively traverse directories in repo scraping markdown file for URLs
# containing pdfs. Downloads pdfs into respective directories.
download_for_directory() {
cd $1
cd $1 || { echo "Error: directory not found." ; exit 1 ; }
for f in *; do
if [ -d "$f" ]; then
download_for_directory $f &
if [[ -d ${f} ]]; then
download_for_directory ${f} &
fi
done
ls | cat *.md 2> /dev/null \
| egrep -o 'https?://[^ ]+' \
| grep 'pdf' | tr -d ')' \
| xargs --no-run-if-empty wget --no-clobber --quiet --timeout=5 --tries=2
# Scrape URLs from markdown files
urls=$(ls | cat *.md 2> /dev/null | egrep -o 'https?://[^ ]+' | grep 'pdf' | tr -d ')')
for url in "$urls"; do
# Ignore empty URLs
if [[ ! -z ${url} ]]; then
wget ${url} --no-clobber --quiet --timeout=5 --tries=2
fi
done
cd ..
echo "$1 done."
}
if [ "$#" -ne 1 ]; then
BASEDIR="$(dirname $0)/.."
download_for_directory $BASEDIR
# If no directories are supplied, iterate over the entire repo.
if [[ "$#" -ne 1 ]]; then
REPO_ROOT_DIR="$(dirname $0)/.."
download_for_directory ${REPO_ROOT_DIR}
else
# Iterate over the specified directories
for dir in "$@"
do
download_for_directory $dir
download_for_directory ${dir}
done
fi
# Wait for child processes to terminate
wait