Merge branch 'master' of https://github.com/papers-we-love/papers-we-love
commit
21a3ee7204
@ -0,0 +1,22 @@
|
||||
# Scripts
|
||||
|
||||
Scripts for working with repository content.
|
||||
|
||||
## Download Utility
|
||||
A convenience script to download papers. This will scrape the README.md files for URLs containing links to pdfs and download them to their respective directories.
|
||||
|
||||
The download utility is idempotent and can be run multiple times safely.
|
||||
|
||||
### Usage
|
||||
Open your favourite terminal and run:
|
||||
|
||||
```bash
|
||||
$ ./scripts/download.sh
|
||||
```
|
||||
|
||||
|
||||
Optionally, to download specific topics specify their directories as arguments:
|
||||
|
||||
```bash
|
||||
$ ./scripts/download.sh android concurrency
|
||||
```
|
@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Guard clause check if required binaries are installed
|
||||
which wget > /dev/null || { echo "Error: wget not installed." ; exit 1 ; }
|
||||
which egrep > /dev/null || { echo "Error: egrep not installed." ; exit 1 ; }
|
||||
|
||||
# Recursively traverse directories in repo scraping markdown file for URLs
|
||||
# containing pdfs. Downloads pdfs into respective directories.
|
||||
download_for_directory() {
|
||||
cd $1 || { echo "Error: directory not found." ; exit 1 ; }
|
||||
|
||||
for f in *; do
|
||||
if [[ -d ${f} ]]; then
|
||||
download_for_directory ${f} &
|
||||
fi
|
||||
done
|
||||
|
||||
# Scrape URLs from markdown files
|
||||
urls=$(ls | cat *.md 2> /dev/null | egrep -o 'https?://[^ ]+' | grep '\.pdf' | tr -d ')')
|
||||
|
||||
for url in "$urls"; do
|
||||
# Ignore empty URLs
|
||||
if [[ ! -z ${url} ]]; then
|
||||
wget ${url} --no-clobber --quiet --timeout=5 --tries=2
|
||||
fi
|
||||
done
|
||||
|
||||
cd ..
|
||||
echo "$1 done."
|
||||
}
|
||||
|
||||
# If no directories are supplied, iterate over the entire repo.
|
||||
if [[ "$#" -eq 0 ]]; then
|
||||
REPO_ROOT_DIR="$(dirname $0)/.."
|
||||
download_for_directory ${REPO_ROOT_DIR}
|
||||
else
|
||||
# Iterate over the specified directories
|
||||
for dir in "$@"
|
||||
do
|
||||
download_for_directory ${dir}
|
||||
done
|
||||
fi
|
||||
|
||||
# Wait for child processes to terminate
|
||||
wait
|
||||
|
Loading…
Reference in new issue