The Hindu Newspaper Download Script

3
#!/bin/bash #ishan dot karve at gmail dot com # #Script to download epaper from Hindu #No more subscription .. pls donate the money to Prime Ministers Welfare Fund #As always /// Its free to use... #Get user to select edition edition_choice=([0]=101 [1]=102 [2]=103 [3]=104 [4]=105 ) edition_name=([0]=Chen nai [1]=Hyderabad [2]=Delhi [3]=Bangalore [4]=Kochi ) echo "Hindu epaper editions are" echo "-------------------------------------------------" echo "0. Chennai" echo "1. Hyderabad" echo "2. Delhi" echo "3. Bangalore" echo "4. Kochi" echo "-------------------------------------------------" while true; do read -p "Enter edition you wish to selec[0-4]: " ed case $ed in [01234]) echo "Thanks." break;; * ) echo "Please select the correct numeric serial.";; esac done read -p "----- Year ?(eg-2012) : " year read -p "----- Month ?(eg - 05 for may) : " month read -p "----- Date ?(eg-01,02..) : " date #spider the selected edition using wget to estimate number of pages #define max incremental page limit max_spider=100 echo "Estimating number of pages in ${edition_name[ed]} edition" #start spider for main editon for (( j = 1 ; j <= $max_spider; j++ )) do #prepend zero to single digits pageno=`printf "%03d" $j` echo "Searching for Page $pageno" I_FILE="http://epaper.th ehindu.com/pdf/$year/$mo nth/$date/$year$month${da t e}A_$pageno${edition_choice[ed]}.pdf" debug=`wget --spider $I_FILE 2>&1`  #echo $debug if [[ $debug =~ .*link!!!.* ]] then break fi echo "ok.. page $pageno exists" done echo "$pageno pages in the main paper" #clear #decrement counter (( j = j - 1 )) npages_A=$j echo "Estimating number of pages in ${edition_name[ed]} edition supplement"

Transcript of The Hindu Newspaper Download Script

7/28/2019 The Hindu Newspaper Download Script

http://slidepdf.com/reader/full/the-hindu-newspaper-download-script 1/3

#!/bin/bash#ishan dot karve at gmail dot com##Script to download epaper from Hindu#No more subscription .. pls donate the money to Prime Ministers Welfare Fund#As always /// Its free to use...#Get user to select editionedition_choice=([0]=101 [1]=102 [2]=103 [3]=104 [4]=105 )edition_name=([0]=Chennai [1]=Hyderabad [2]=Delhi [3]=Bangalore [4]=Kochi )echo "Hindu epaper editions are"echo "-------------------------------------------------"echo "0. Chennai"echo "1. Hyderabad"echo "2. Delhi"echo "3. Bangalore"echo "4. Kochi"echo "-------------------------------------------------"while true; do

read -p "Enter edition you wish to selec[0-4]: " edcase $ed in

[01234])echo "Thanks."break;;

* ) echo "Please select the correct numeric serial.";;

esacdone

read -p "----- Year ?(eg-2012) : " yearread -p "----- Month ?(eg- 05 for may) : " monthread -p "----- Date ?(eg-01,02..) : " date#spider the selected edition using wget to estimate number of pages#define max incremental page limitmax_spider=100echo "Estimating number of pages in ${edition_name[ed]} edition"#start spider for main editonfor (( j = 1 ; j <= $max_spider; j++ ))

do

#prepend zero to single digitspageno=`printf "%03d" $j`echo "Searching for Page $pageno"I_FILE="http://epaper.thehindu.com/pdf/$year/$month/$date/$year$month${dat

e}A_$pageno${edition_choice[ed]}.pdf"debug=`wget --spider $I_FILE 2>&1` #echo $debug

if [[ $debug =~ .*link!!!.* ]]thenbreak

fiecho "ok.. page $pageno exists"

doneecho "$pageno pages in the main paper"#clear#decrement counter(( j = j - 1 ))npages_A=$jecho "Estimating number of pages in ${edition_name[ed]} edition supplement"

7/28/2019 The Hindu Newspaper Download Script

http://slidepdf.com/reader/full/the-hindu-newspaper-download-script 2/3

#start spider for newapaper supplementfor (( j = 1 ; j <= $max_spider; j++ ))

do#prepend zero to single digitspageno=`printf "%03d" $j`echo "Searching for Page $pageno"I_FILE="http://epaper.thehindu.com/pdf/$year/$month/$date/$year$month${dat

e}B_$pageno${edition_choice[ed]}.pdf"debug=`wget --spider $I_FILE 2>&1` # echo $debug

if [[ $debug =~ .*link!!!.* ]]thenbreakfi

 done#clear#decrement counter(( j = j - 1 ))npages_B=$j

ty_dir="${HOME}"/Desktop/"hindu_${edition_name[ed]}_${year}_${month}_${date}"#mkdir to store individual pages

mkdir $ty_direcho "Please be patient..Bandwidth intensive operation starts..;-)"echo "Downloading Main Paper .. total $npages_A pages"

for (( i = 1 ; i <= npages_A; i++ ))do#prepend zero to single digitspageno=`printf "%03d" $i`echo "Downloading Page $pageno"O_FILE="$ty_dir/A$pageno.pdf"I_FILE="http://epaper.thehindu.com/pdf/$year/$month/$date/$year$month${dat

e}A_$pageno${edition_choice[ed]}.pdf"wget -q -O $O_FILE $I_FILE

done echo "Downloading Supplement .. total $npages_B pages"

for (( i = 1 ; i <= npages_B; i++ ))do#prepend zero to single digitspageno=`printf "%03d" $i`echo "Downloading Page $pageno"O_FILE="$ty_dir/B$pageno.pdf"I_FILE="http://epaper.thehindu.com/pdf/$year/$month/$date/$year$month${dat

e}B_$pageno${edition_choice[ed]}.pdf"wget -q -O $O_FILE $I_FILE

done 

echo "Combining all pages into a single pdf document"#combine multiple pdf filesgs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=$HOME/Desktop/The_Hindu_${edition_name[ed]}_${year}_${month}_${date}.pdf -dBATCH $ty_dir/*.pdf#empty directoryrm $ty_dir/*.*

7/28/2019 The Hindu Newspaper Download Script

http://slidepdf.com/reader/full/the-hindu-newspaper-download-script 3/3

#remove directoryrmdir $ty_dir