#!/bin/bash
#########################################################################################
#
# get_feed RSS feed reader.
#
#
# © Copyright 2012 Arto Jääskeläinen <temp001(at)pp.inet.fi>
# Part of Auto DL software package. All rights reserved.
# The program is distributed under the terms of the GNU General Public License
#
#    Auto DL is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    Auto DL is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with Auto DL.  If not, see <http://www.gnu.org/licenses/>.
#
#
# V1.0   2012-07-27  Initial version
# V1.1   2012-09-18  System/user config /etc/auto_dl/autodl.conf, ~/.auto_dl/autodl.conf
# V1.2   2012-09-21  Name detection trailing tab/space removal, done touch status test, rec start time to log  
# V1.21  2012-09-21  Disown record() after launching
# V1.22  2012-09-24  yle-dl call parameter fix -- no ""
# V1.23  2012-09-28  UseWinFilename, verbose logging
# V1.24  2012-10-04  download_path outside of /home, RecordExitTask
# V1.25  2012-10-09  default_download_path fix
# V1.26  2012-10-09  validate_link fix
# V1.27  2012-10-10  data consistency check and error logging before pipelinks 
# V1.28  2012-10-16  temporary fix for language 
# V1.29  2012-10-16  get system country settings, medium_title update 
# V1.30  2013-01-04  download retry counter, fixed value=5 
# V1.31  2013-01-05  MaxDownloadCount variable, user configurable, default=5
# V1.32  2013-07-26  Retry after any type of http error
#
#########################################################################################
inst_dir=/opt/auto_dl
logs_dir=$inst_dir/logs
main_log="get_feed.log"
rec_log="recorded.log"
fail_log="failure.log"
data_root=$inst_dir/.auto_dl
system_conf_path="/etc/auto_dl/autodl.conf"

get_conf()
{
local path_file="$1"
local conf_par="$2"
if [ -f "$path_file" ] && [ -n "$conf_par" ]; then
	line=$(cat "$path_file" | sed 's/^#.*//g' | grep -wi "$conf_par")
	if [ -n "$line" ]; then
	response=$(echo ${line##*=})
	else
	response=""
	fi
else
	return 1 
fi
}

get_system_conf()
{
local system_conf_path="/etc/autodl.conf"
if [ "$1" = "" ]; then
return 1
fi
get_conf "$system_conf_path" "$1"
if [ $? = "0" ] && [ -n "$response" ]; then 
conf=$response
else
conf=""
return 1
fi
}

get_os_conf()
{
local system_conf_path="$1"
if [ "$1" = "" ]; then
return 1
fi
get_conf "$system_conf_path" "$2"
if [ $? = "0" ] && [ -n "$response" ]; then 
conf=$response
else
conf=""
return 1
fi
}

define_setting()
{
local var_name="$1"	
local default="$2"	
get_system_conf "$var_name"
if [ $? = "0" ]; then
setting=$conf 
eval echo -e "$(date "+%F %T %z") System defined $var_name=$setting" $pipe_to_log
else
setting="$default"
if [ -z "$setting" ]; then show_setting="no"; else show_setting=$setting; fi
eval echo -e "$(date "+%F %T %z") Default $var_name=$show_setting" $pipe_to_log 
fi
}

set_default_language()
{
get_os_conf "/etc/default/locale" "LANG"
os_conf=$conf
if [ -n "$os_conf" ]; then 
default_language="$os_conf"
else
default_language="en_US.UTF-8"
fi
}

set_language()
{
set_default_language
define_setting "LANG" "$default_language"
}	

logging()
{
sw=$1
if [ $(echo ${sw,,}) = "on" ]; then
pipe_to_log=" | tee -a $logs_dir/$log_file"
else
if [ $(echo ${sw,,}) = "off" ]; then
pipe_to_log=""
fi
fi	
}

bash_or_sh()
{
local pathfile="$@"
[ -z "$pathfile" ] && return 100 
[ -f "$pathfile" ] || return 101 
[ -x "$pathfile" ] || return 102 
header=$(dd if=$pathfile bs=12 count=1 2>&- | grep -E '#\!/bin/(bash|sh)$')
if [ -z "$header" ]; then
return 103
else
return 0
fi
}

get_conf()
{
path_file="$1"
conf_par="$2"
if [ -f "$path_file" ] && [ -n "$conf_par" ]; then
	line=$(cat "$path_file" | sed 's/^#.*//g' | grep -wi "$conf_par")
	if [ -n "$line" ]; then
	response=$(echo ${line##*=})
	else
	response=""
	fi
else
	return 1 
fi
}

set_default_download_path()
{
if [ -d "$user_path/Videos" ]; then 
default_download_path="$user_path/Videos"
else
default_download_path="$user_path/Videot"
fi
}

get_user_conf()
{
user_conf_path="/home/$username/.auto_dl/autodl.conf"	
if [ "$1" = "" ]; then
return 1
fi
get_conf "$user_conf_path" "$1"
if [ $? = "0" ] && [ -n "$response" ]; then 
conf=$response
else
conf=""
return 1
fi
}

get_system_conf()
{
if [ "$1" = "" ]; then
return 1
fi
get_conf "$system_conf_path" "$1"
if [ $? = "0" ] && [ -n "$response" ]; then 
conf=$response
else
conf=""
return 1
fi
}

define_setting()
{
var_name="$1"	
default="$2"	
get_user_conf "$var_name"
if [ $? = "0" ]; then
setting=$conf
echo "$(date "+%F %T %z") User defined $var_name=$setting " | tee -a $logs_dir/$main_log
else
        get_system_conf "$var_name"
        if [ $? = "0" ]; then
        setting=$conf 
        echo "$(date "+%F %T %z") System defined $var_name=$setting " | tee -a $logs_dir/$main_log
        else
        setting="$default"
        if [ -z "$setting" ]; then show_setting="no"; else show_setting=$setting; fi
        echo "$(date "+%F %T %z") Default $var_name=$show_setting " | tee -a $logs_dir/$main_log
        fi
fi
}

check_match()
{
matches=0	
st_1="$1"
shift
st_array=($*)
st_elems=${#st_array[*]}
for (( i=0; i<$st_elems; i++ ))
do
	if [ "${st_array[$i]}" != "${st_array[$i]/$st_1/}" ] ; then 
	(( matches++ ))
	fi
done
}

check_maintenance()
{
maint_words=(huolto katko huolletaan häiriö)
matching_words=0
info=$(cat "$1" | sed '/\<STYLE/,/\/STYLE\>/d; s/<[^>]*>//g' | tr -s '\n')
for word in ${maint_words[*]}
do
	text=${info,,}
	check_match "$word" "$text"
	if [ "$matches" != "0" ]; then
	((matching_words=$matching_words+$matches))
	fi
	
done
if [ "$matching_words" != "0" ]; then
echo "$(date "+%F %T %z") Maintenance break detected: $f_link" | tee -a $logs_dir/$fail_log 	
echo "$(date "+%F %T %z") ${info:0:80}" | tee -a $logs_dir/$fail_log
echo -e "Message:\n"$info
else
echo "$(date "+%F %T %z") Invalid feed file: $f_link" | tee -a $logs_dir/$fail_log
close_all 68
fi
}

init_dl_count()
{
[[ -e "$data_root/dl_count/$link_id" ]] || echo 0 >"$data_root/dl_count/$link_id"
}

inc_dl_count()
{
if [ ! -e "$data_root/dl_count/$link_id" ]; then
init_dl_count 
else
cnt=$(cat "$data_root/dl_count/$link_id")
(( cnt++ ))
echo $cnt >"$data_root/dl_count/$link_id" 
fi	
}

record()
{
touch "$data_root/jobs_done/$link_id" &&
echo "Record:"
echo "Title= $long_title"
echo "Link= $link"
echo
username=$(echo ${user_path##*/})
echo "$(date "+%F %T %z")  Start: $long_title" $link "-->" $download_path | tee -a $logs_dir/recorded.log
[[ -z "$LANG" ]] && export LANG="fi_FI.UTF-8"
yle-dl $par0 --destdir $download_path $link &>/dev/null 
if [ "$?" = "0" ]; then  
	echo "$(date "+%F %T %z")  Done: $long_title" $link >> $logs_dir/recorded.log						
else
	echo "$(date "+%F %T %z")  FAILURE: $long_title" $link >> $logs_dir/recorded.log
	rm "$data_root/jobs_done/$link_id"
fi
inc_dl_count
find $download_path -user root -exec chown $username:$username {} \;
if [ -n "$record_exit_task" ]; then
( su $username -c "$record_exit_task" 2>&- 1>&- 0>&- & ) &
sleep 1
disown -h
sleep 1
fi
}

get_category()
{
category=$(cat "$1" | grep -i "category" | cut -d $'\n' -f1 | sed 's/ *<[^>]*>//g')
}

get_medium_title()
{
local par="$1"
local pipe_tail="$(echo ${par##*|})"	
medium_title="$(echo $(echo ${pipe_tail%%:*}) | sed 's/(.*)//g;s/[ \t]*$//')"
}

check_tsv_links()
{
non_link_count=$(cat "$@.tsv" | cut -f2 | cut -d '=' -f2 | grep -vc "http://")
if [ $non_link_count -gt 0 ]; then
echo "$(date "+%F %T %z") Invalid data detected $non_link_count times in tsv link field." | tee -a $logs_dir/$fail_log
fi
}	

get_latest_feed_pipelinks()
{
local filename="$@"	
rm $work_dir/pipelinks/* 2>/dev/null
non_link_count=$(cat "$filename.tsv" | cut -f2 | cut -d '=' -f2 | grep -vc "http://")
if [ $non_link_count -gt 0 ]; then
echo "$(date "+%F %T %z") Skipping over invalid data which was detected $non_link_count times in tsv link field." | tee -a $logs_dir/$fail_log
fi
cat "$filename.tsv" | cut -f2 | grep -i "link=" | cut -d '=' -f2 \
| sed 's/\//|/g' \
| xargs -r -n 1 -I{} touch $work_dir/pipelinks/{}
}
. $inst_dir/xml2tsv-*

validate_link()
{
local f_link="$@"
local short_link	
if [ -z "$f_link" ] ; then
	echo
	echo "$(date "+%F %T %z") RSS feed reader error: Parameter missing." | tee -a $logs_dir/$fail_log
	close_all 69 
else
	short_link=$(echo "$f_link" | cut -d'&' -f1)  
	if [ $(echo ${short_link:0:7}) == "http://" ] ; then
		domain=$(echo $short_link | cut -d'/' -f3)
	else
	echo
	echo "$(date "+%F %T %z") Not http link: $f_link" | tee -a $logs_dir/$fail_log
	close_all 70 
	fi
fi
host_resp=$(host "$(echo "$domain")")
ip_feed=$(echo "$host_resp" | grep -o -E '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}')
if [ -z "$ip_feed" ] ; then
	echo
	echo "$(date "+%F %T %z") Domain" $domain":" $host_resp | tee -a $logs_dir/$fail_log
	close_all 71 
fi
}

validate_rss()
{
rss_tags=$(cat "$1" | grep -o -e "<rss" -e "/rss>" | tr -d '\r\n')
if [ -z "$rss_tags" ] ; then
echo
echo "$(date "+%F %T %z") Not RSS destination: $f_link" | tee -a $logs_dir/$fail_log
return 73
fi
}

close_all()
{
pass_error="$*"
echo
echo "$(date "+%F %T %z") Feed reader ("$$") closing" | tee -a $logs_dir/$main_log
cd $data_root
if [ "${#work_dir}" -gt 15 ] && [ -d "$work_dir" ] ; then
rm -rf $work_dir 2>/dev/null  
fi
exit $pass_error
}

init()
{
logging on
mkdir -p $data_root
if [ ! -d $data_root ]; then
echo "Failed to find data root." | tee -a $logs_dir/$fail_log 
close_all 64  
fi 	
mkdir -p $logs_dir
touch $logs_dir/$main_log
if [ ! -f $logs_dir/$main_log ] ; then
	echo
	echo "$(date "+%F %T %z") Failed to find main log file." | tee -a $logs_dir/$failure.log
	close_all 65
fi
mkdir -p $data_root/.live_data/$BASHPID
if [ ! -d $data_root/.live_data/$BASHPID ] ; then
	echo
	echo "$(date "+%F %T %z") Failed to find work directory." | tee -a $logs_dir/$fail_log
	close_all 66
fi
work_dir=$data_root/.live_data/$BASHPID
mkdir -p $data_root/jobs_done
if [ ! -d $data_root/jobs_done ]; then
echo "$(date "+%F %T %z") Failed to find $data_root/jobs_done" | tee -a $logs_dir/$fail_log
close_all 67
fi
mkdir -p $work_dir/pipelinks
if [ ! -d $work_dir/pipelinks ]; then
echo "$(date "+%F %T %z") Failed to find $work_dir/pipelinks" | tee -a $logs_dir/$fail_log
close_all 68
fi
mkdir -p $data_root/dl_count
if [ ! -d $data_root/dl_count ]; then
echo "$(date "+%F %T %z") Failed to find $data_root/dl_count" | tee -a $logs_dir/$fail_log
close_all 69
fi
cd $work_dir
trap close_all HUP INT USR1 USR2 TERM
}

idle()
{
for (( i=$(($ttl*6)); i>0; i-- )) do
echo -ne "\rIdle...$((i*10)) s   "
sleep 10
done	
}

get_feed_file()
{
echo "Receiving RSS feed..."
resp=$(wget -nv -O$feed_file_name "$f_link" 2>&1)
}

error_activity()    
{
filt_resp=$(echo $(echo $resp | grep -o '\s[0-9]\{3\}[^0-9]'))	
case $filt_resp in   
		404: )	
			echo; echo "$(date "+%F %T %z") Http error, not found 404: $f_link" | tee -a $logs_dir/$fail_log
			echo; echo "$(date "+%F %T %z") Scheduling retry after $ttl minutes." | tee -a $logs_dir/$fail_log; idle ;;
		500: )
			echo; echo "$(date "+%F %T %z") Http error, feed server internal error 500: $f_link" | tee -a $logs_dir/$fail_log
			echo; echo "$(date "+%F %T %z") Scheduling retry after $ttl minutes." | tee -a $logs_dir/$fail_log; idle ;;		
		503: )
			echo; echo "$(date "+%F %T %z") Service not available 503: $f_link" | tee -a $logs_dir/$fail_log
			echo; echo "$(date "+%F %T %z") Scheduling retry after $ttl minutes." | tee -a $logs_dir/$fail_log; idle ;;
		* )		
			echo; echo "$(date "+%F %T %z") Http error: $resp" | tee -a $logs_dir/$fail_log
			echo; echo "$(date "+%F %T %z") Scheduling retry after $ttl minutes." | tee -a $logs_dir/$fail_log; idle ;;
		esac
}

get_title()
{
p_link="$1"	
link=$(echo ${p_link//|/\/})		#http://...
long_title=$(cat "$feed_file_name.tsv" | grep "$link" | cut -f1 | cut -d '=' -f2)
}

echo "$(date "+%F %T %z") RSS feed reader ("$$") started" | tee -a $logs_dir/$main_log
init
set_language 
f_link="$1"
user_path="$2"  
username=$(echo ${user_path##*/}) 
jobs_dir=$user_path/.auto_dl/adl_jobs 
if [ -z "$f_link" ]; then
	echo "$(date "+%F %T %z") RSS link not specified." | tee -a $logs_dir/$fail_log
	close_all 73
fi
if [ -z "$user_path" ]; then
	echo "$(date "+%F %T %z") Homedir not specified." | tee -a $logs_dir/$fail_log
	close_all 74 
fi
if [ ! -d "$user_path" ]; then
	echo "$(date "+%F %T %z") Invalid homedir "$user_path | tee -a $logs_dir/$fail_log
	close_all 75 
fi
set_default_download_path
define_setting "DownloadPath" "$default_download_path"
download_path=$setting
if [ ! -d "$download_path" ]; then
	echo "$(date "+%F %T %z") Invalid download path "$download_path | tee -a $logs_dir/$fail_log
	close_all 76 
fi
chmod g+s $download_path
chown $username:$username $download_path
validate_link "$f_link"
echo "Feed link ok."
ttl=10
feed_file_name=$(echo "$f_link" | sed 's/.*\///')
echo "$(date "+%F %T %z") user=$username, home=$user_path, link=$f_link" | tee -a $logs_dir/$main_log
define_setting "UseWinFilenames" "No"
answer=${setting,,}
if [ $answer != "yes" ] && [ $answer != "no" ]; then
echo "$(date "+%F %T %z") Invalid UseWinFilenames=$answer" | tee -a $logs_dir/$fail_log
echo "$(date "+%F %T %z") Not using UseWinFilenames=$answer, using default" | tee -a $logs_dir/$main_log
fi
if [ $answer = "yes" ]; then
echo "$(date "+%F %T %z") Accepted UseWinFilenames=$answer" | tee -a $logs_dir/$main_log
par0=" --vfat "
else
par0=""
fi
define_setting "RecordExitTask" ""
file_to_run=$setting
if [ -n "$file_to_run" ]; then
bash_or_sh "$file_to_run"
result="$?"
case $result in
0)		echo "$(date "+%F %T %z") Will run RecordExitTask=$file_to_run as user \"$username\"." | tee -a $logs_dir/$main_log
		record_exit_task="$file_to_run" ;;
101)	echo "$(date "+%F %T %z") File not found: RecordExitTask=$file_to_run" | tee -a $logs_dir/$main_log ;;
102)	echo "$(date "+%F %T %z") Execute permission not set for RecordExitTask=$file_to_run" | tee -a $logs_dir/$fail_log ;;
103)	echo "$(date "+%F %T %z") Not bash/sh, will not run RecordExitTask=$file_to_run" | tee -a $logs_dir/$main_log ;;
esac
fi 
define_setting "MaxDownloadCount" 5
max_dl_count=$setting
for ((;;))
do
	data_ok="false"
	until [ "$data_ok" = "true" ]
	do
		get_feed_file
		es="$?"
		if [ "$es" != "0" ] ; then 
			error_activity  
		else
			data_ok="true"	
		fi
	done
	echo "Feed received."
	validate_rss "$feed_file_name"
	if [ "$?" = "0" ] ; then
		echo "RSS feed contents validation passed."
		echo -n "Filtering file..."  
		cat "$feed_file_name" | tr -d '\r' > "$feed_file_name.fixed" 
		rm "$feed_file_name"
		mv "$feed_file_name.fixed" "$feed_file_name"
		echo " done."
		ttl_received=$(grep -i "ttl" $feed_file_name | sed 's/<[^>]*>//g; s/ *//g')
		if [ "$ttl_received" != '' ] ; then
			ttl=$ttl_received
			echo "Using received RSS cycle $ttl minutes."
			else
			echo "Using default RSS cycle $ttl minutes."
		fi
		get_category "$feed_file_name" 
		echo "Category= $category"
			echo -n "Converting to tsv..."
			xml2tsv $feed_file_name
			echo " done."
			echo "Getting feed links..."
			get_latest_feed_pipelinks "$feed_file_name"
			echo "Done."
			for pathfile in $work_dir/pipelinks/*
			do
				pipe_link="$(echo ${pathfile##*/})"
				get_title $pipe_link		#Long title
				get_medium_title "$long_title" 
				jobname=$(echo $f_link | sed 's/\//|/g')  
				if [ -e "$jobs_dir/$jobname/$medium_title" ]; then
				echo "Matching title "\"$medium_title\" "found on recording list of: "\"$username\"
				link=$(echo $pipe_link | sed 's/|/\//g')		#http://...
				link_id=$(echo $link | grep -oE [[:digit:]]\{3,8\}$ )
						if [ ! -e "$data_root/jobs_done/$link_id" ] ; then
						init_dl_count
						if [ "$(cat $data_root/dl_count/$link_id)" -lt "$max_dl_count" ]; then
						( record "$link" & ) & 
						sleep 1
						disown -h
						sleep 1
						else
						echo "$(date "+%F %T %z")  $max_dl_count times FAILURE: $long_title" $link | tee -a $logs_dir/recorded.log
						echo "$(date "+%F %T %z")  No more trying $long_title" $link | tee -a $logs_dir/recorded.log 
						touch "$data_root/jobs_done/$link_id"
						fi
					else
					echo "This was already done: $medium_title, id: $link_id"
					fi
				fi
			done	
	else  
	check_maintenance "$feed_file_name"
    fi	 
	idle
	echo -e "\rTime for new feed...                    "
done
