The Hindu Newspaper Download Script
-
Upload
sathyamoorthy-venkatesh -
Category
Documents
-
view
214 -
download
0
Transcript of The Hindu Newspaper Download Script
7/28/2019 The Hindu Newspaper Download Script
http://slidepdf.com/reader/full/the-hindu-newspaper-download-script 1/3
#!/bin/bash#ishan dot karve at gmail dot com##Script to download epaper from Hindu#No more subscription .. pls donate the money to Prime Ministers Welfare Fund#As always /// Its free to use...#Get user to select editionedition_choice=([0]=101 [1]=102 [2]=103 [3]=104 [4]=105 )edition_name=([0]=Chennai [1]=Hyderabad [2]=Delhi [3]=Bangalore [4]=Kochi )echo "Hindu epaper editions are"echo "-------------------------------------------------"echo "0. Chennai"echo "1. Hyderabad"echo "2. Delhi"echo "3. Bangalore"echo "4. Kochi"echo "-------------------------------------------------"while true; do
read -p "Enter edition you wish to selec[0-4]: " edcase $ed in
[01234])echo "Thanks."break;;
* ) echo "Please select the correct numeric serial.";;
esacdone
read -p "----- Year ?(eg-2012) : " yearread -p "----- Month ?(eg- 05 for may) : " monthread -p "----- Date ?(eg-01,02..) : " date#spider the selected edition using wget to estimate number of pages#define max incremental page limitmax_spider=100echo "Estimating number of pages in ${edition_name[ed]} edition"#start spider for main editonfor (( j = 1 ; j <= $max_spider; j++ ))
do
#prepend zero to single digitspageno=`printf "%03d" $j`echo "Searching for Page $pageno"I_FILE="http://epaper.thehindu.com/pdf/$year/$month/$date/$year$month${dat
e}A_$pageno${edition_choice[ed]}.pdf"debug=`wget --spider $I_FILE 2>&1` #echo $debug
if [[ $debug =~ .*link!!!.* ]]thenbreak
fiecho "ok.. page $pageno exists"
doneecho "$pageno pages in the main paper"#clear#decrement counter(( j = j - 1 ))npages_A=$jecho "Estimating number of pages in ${edition_name[ed]} edition supplement"
7/28/2019 The Hindu Newspaper Download Script
http://slidepdf.com/reader/full/the-hindu-newspaper-download-script 2/3
#start spider for newapaper supplementfor (( j = 1 ; j <= $max_spider; j++ ))
do#prepend zero to single digitspageno=`printf "%03d" $j`echo "Searching for Page $pageno"I_FILE="http://epaper.thehindu.com/pdf/$year/$month/$date/$year$month${dat
e}B_$pageno${edition_choice[ed]}.pdf"debug=`wget --spider $I_FILE 2>&1` # echo $debug
if [[ $debug =~ .*link!!!.* ]]thenbreakfi
done#clear#decrement counter(( j = j - 1 ))npages_B=$j
ty_dir="${HOME}"/Desktop/"hindu_${edition_name[ed]}_${year}_${month}_${date}"#mkdir to store individual pages
mkdir $ty_direcho "Please be patient..Bandwidth intensive operation starts..;-)"echo "Downloading Main Paper .. total $npages_A pages"
for (( i = 1 ; i <= npages_A; i++ ))do#prepend zero to single digitspageno=`printf "%03d" $i`echo "Downloading Page $pageno"O_FILE="$ty_dir/A$pageno.pdf"I_FILE="http://epaper.thehindu.com/pdf/$year/$month/$date/$year$month${dat
e}A_$pageno${edition_choice[ed]}.pdf"wget -q -O $O_FILE $I_FILE
done echo "Downloading Supplement .. total $npages_B pages"
for (( i = 1 ; i <= npages_B; i++ ))do#prepend zero to single digitspageno=`printf "%03d" $i`echo "Downloading Page $pageno"O_FILE="$ty_dir/B$pageno.pdf"I_FILE="http://epaper.thehindu.com/pdf/$year/$month/$date/$year$month${dat
e}B_$pageno${edition_choice[ed]}.pdf"wget -q -O $O_FILE $I_FILE
done
echo "Combining all pages into a single pdf document"#combine multiple pdf filesgs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=$HOME/Desktop/The_Hindu_${edition_name[ed]}_${year}_${month}_${date}.pdf -dBATCH $ty_dir/*.pdf#empty directoryrm $ty_dir/*.*