#!/usr/bin/env perl
#
# EMG WATCHDOG
#
# This watchdog will monitor EMG process, check disk space and more.
# If a problem arises it will try to remedy the problem and send an e-mail notification.
# It also features an integrated web server which provides a json api for EMG server restarts etc.
#
# You can create a file "emg_watchdog.cfg" in the same directory and override conf variables in it:
# @notify_recipients = ('my_name@my_domain.com');
#
# Available api calls:
# /api/ping - Check that watchdog responds
# /api/emg_start - Start emgd
# /api/emg_stop - Stop emgd (it may be restarted by watch on next check)
# /api/emg_status - Get status for emgd process
# /api/get_log_file_list - Get list of EMG log files
# /api/get_log_file - Get contents (tail) of a EMG log file (params: file, rows, grep)
#

use strict;

use constant LOG_CRIT => 'CRIT';
use constant LOG_ERR => 'ERR';
use constant LOG_WARN => 'WARN';
use constant LOG_INFO => 'INFO';

use constant URI_EMG_START => '/api/emg_start';
use constant URI_EMG_STOP => '/api/emg_stop';
use constant URI_EMG_STATUS => '/api/emg_status';
use constant URI_GET_LOG_FILE_LIST => '/api/get_log_file_list';
use constant URI_GET_LOG_FILE => '/api/get_log_file';
use constant URI_PING => '/api/ping';

use builtin qw( true false );

# You need to install these cpan modules if they are not already present in your system.
# For example, run "cpan Email::Simple" to install that module (and its dependencies).
use Email::Sender::Simple qw(sendmail);
use Email::Sender::Transport::SMTP;
use Email::Simple;
use Email::Simple::Creator;
use File::Basename;
use File::Copy;
use Filesys::Df;
use HTTP::Daemon;
use HTTP::Status;
use IPC::Shareable (':lock');
use JSON;
use Net::Subnet;
use POSIX qw(strftime);
use URI::QueryParam;

# Configuration variables - modify to match your environment

# EMG config dir
our $emg_dir = '/home/emg/etc';
# Watchdog log file
our $logfile = "$emg_dir/log/emg_watchdog.log";
# Stop emgd quickly (signal emgd to stop, wait a few seconds and then kill it)
our $emg_quickstop = 1;
# Specifies how often (in seconds) checks should be run 
our $check_interval = 60;
# Specifies how often (in seconds) notifications should be sent for persistent error (21600 secs = 6 hours)
our $notify_interval = 21600;
# File system to check for used space
our $fs_to_check = '/';
# File system usage limit (in %), notification will be sent when limit exceeded
our $fs_limit = 80;
# Port on which integrated web server listens for incoming requests
our $listen_port = 3000;
# Array with recipients for e-mail notifications (empty by default)
our @notify_recipients = ('');
# Example with two recipients
#our @notify_recipients = ('john1@example.com', 'john2@example.com');
# E-mail notification "From" address
our $mail_from = 'changeme_from@example.com';
# E-mail notification subject prefix (consider adding hostname for easier identification)
our $mail_subject_prefix = 'EMG watchdog';
# IP address of SMTP server to use for sending e-mails
our $smtp_server = '127.0.0.1';
# Port of SMTP server to use for sending e-mails
our $smtp_port = '25';
# Username for SMTP server authentication (undef = no auth)
our $smtp_username = undef;
# Password for SMTP server authentication (undef = no auth)
our $smtp_password = undef;
# Allowed client ip addresses / subnets for web server
our $allowed_client_ips = subnet_matcher qw(
	127.0.0.1/32
	192.168.0.0/24
);
our $start_emgd_manually = false;

# You shouldn't need to modify anything below...

my $dirname = dirname(__FILE__);
# Configuration file where we can override above "our" variables
my $cfg_file = "$dirname/emg_watchdog.cfg";

if(-f $cfg_file) {
	require $cfg_file;
}

# Auto-flush output
$| = 1;

my $mail_transport = undef;
my $emglock;
my $last_notify_diskspace = 0;
my $last_notify_emg_startup = 0;
my $block_name = "$emg_dir/block_emgd";

# Set up environment variables
$ENV{EMGDIR} = $emg_dir;

sub start_emgd
{
	if ($start_emgd_manually) {
		my $output = `emgd`;
		if ($? != 0) {
			return $output;
		}
	} else {
		unlink $block_name;
	}
	return undef;
}

sub emg_start
{
	$emglock->shlock;
	notify(LOG_WARN, "EMG startup requested");
	my $output = start_emgd;
	if (defined($output)) {
		sleep 1;
		my $msg = "Output from 'emgd':\n$output\n--\nGeneral log file (last 100 lines):\n" . `tail -100 $emg_dir/log/general`;
		notify(LOG_CRIT, "EMG startup failed", $msg);
	} else {
		notify(LOG_WARN, "EMG started");
	}
	$emglock->shunlock;
}

sub emg_stop
{
	$emglock->shlock;
	notify(LOG_WARN, "EMG stop requested");
	if (!$start_emgd_manually) {
		open my $fh, '>', $block_name;
		close $fh;
	}
	if($emg_quickstop) {
		if(system('emgd --stop >/dev/null 2>&1 &') == 0) {
			sleep 3;
		}
		`pkill -9 -f "emgd\b"`;
		sleep 1;
	} else {
		`emgd --stop`;
	}
	notify(LOG_WARN, "EMG stopped");
	$emglock->shunlock;
}

sub get_log_file_list
{
	my $logdir = "$emg_dir/log";
	opendir(my $dh, $logdir);
	my @files = readdir($dh);
	closedir($dh);
	my @fileinfos = ();
	foreach(sort @files) {
		# Skip file names that start with "."
		next if(/^\./);
		my $file = "$logdir/$_";
		next unless(-f $file);
		my @stat = stat($file);
		my $fileinfo;
		$fileinfo->{filename} = $_;
		$fileinfo->{size} = $stat[7];
		$fileinfo->{mtime} = strftime("%Y-%m-%d %H:%M:%S", localtime($stat[9]));
		push(@fileinfos, $fileinfo);
	}
	@fileinfos;
}

sub get_log_file
{
	my ($file, $maxrows, $search_string) = @_;
	my $logdir = "$emg_dir/log";

	$maxrows ||= 100;

	do_log(LOG_INFO, "Get log file $file, rows $maxrows, search_string $search_string");

	if($search_string) {
		$search_string =~ s/^\s+//;
		if($search_string ne '') {
			my $cmd = "cat $logdir/$file";
			foreach(split(/\s+/, "$search_string")) {
				$cmd .= " | grep -i \"$_\" ";
			}
			return `$cmd | tail -$maxrows`;
		}
	}

	return `tail -$maxrows $logdir/$file`;
}

sub is_allowed_client_ip($)
{
	my $ip = shift;
	return $allowed_client_ips->($ip);
}

sub create_response
{
	my($status, $data) = @_;

	$status ||= 200;
	my $content;
	if($status == 200) {
		$content->{'status'} = 'ok';
	} else {
		$content->{'status'} = 'error';
	}
	if(defined($data)) {
		$content->{'data'} = $data;
	}

	my $response = HTTP::Response->new($status);

	$response->header('Content-Type' => 'application/json');
	$response->content(encode_json($content));

	return $response;
}

sub process_request($$)
{
	my($c, $r) = @_;

	if ($r->method ne 'GET') {
		return 0;
	}
	if ($r->uri->path eq URI_PING) {
		$c->send_response(create_response);
		return 1;
	} elsif ($r->uri->path eq URI_EMG_START) {
		emg_start;
		$c->send_response(create_response);
		return 1;
	} elsif ($r->uri->path eq URI_EMG_STOP) {
		emg_stop;
		$c->send_response(create_response);
		return 1;
	} elsif ($r->uri->path eq URI_GET_LOG_FILE_LIST) {
		my @fileinfos = get_log_file_list;
		$c->send_response(create_response(RC_OK, \@fileinfos));
		return 1;
	} elsif ($r->uri->path =~ URI_GET_LOG_FILE) {
		my %query = $r->uri->query_form;

		my $file = $query{file};
		my $maxrows = $query{maxrows};
		my $search_string = $query{search_string};

		my @rows = get_log_file($file, $maxrows, $search_string);

		$c->send_response(create_response(RC_OK, \@rows));
		return 1;
	} elsif ($r->uri->path eq URI_EMG_STATUS) {
		my $data = 'unknown';
		if(system('emgstat >/dev/null') == 0) {
			$data = 'running';
		}
		$c->send_response(create_response(RC_OK, $data));
		return 1;
	}
	return 0;
}

sub start_http_server
{
	my $d = HTTP::Daemon->new(
			LocalPort => $listen_port
			) || die "Could not start http server";

	if(fork() != 0) {
		return;
	}

	print "Server started at ", $d->url, "\n";

	while (my $c = $d->accept) {
		my $client_ip = $c->peerhost();
		while (my $r = $c->get_request) {
			if(!is_allowed_client_ip($client_ip)) {
				do_log(LOG_WARN, "Rejected request from ip $client_ip, uri=" . $r->uri->path);
				$c->send_response(create_response(RC_FORBIDDEN, 'Client not allowed'));
				next;
			}
			do_log(LOG_INFO, "Request from ip $client_ip, uri=" . $r->uri->path);
			unless(process_request($c, $r)) {
				$c->send_response(create_response(RC_FORBIDDEN, 'Invalid request'));
			}
		}
		$c->close;
		undef($c);
	}

	exit;
}

sub init_smtp_client
{
	my %params = {
		host => $smtp_server,
		port => $smtp_port,
		sasl_username => $smtp_username,
		sasl_password => $smtp_password
	};

	$mail_transport = Email::Sender::Transport::SMTP->new( %params );
}

sub init_lock
{
	$emglock = tie my $_emglock, 'IPC::Shareable', { key => 'emgl', create => 1, mode => 0600 };
}

sub init_all
{
	init_smtp_client;
	init_lock;
}

sub notify_send
{
	my ($to, $from, $subject, $message) = @_;

	my $email = Email::Simple->create(
			header => [
			To      => $to,
			From    => $from,
			Subject => $subject,
			],
			body => $message,
			);

	sendmail($email, { transport => $mail_transport });
}

sub do_log($$)
{
	my($loglevel, $subject) = @_;

	my $ts = strftime("%Y-%m-%d %H:%M:%S", localtime);
	open(my $fh, ">>$logfile");
	print $fh "$ts $loglevel: $subject\n";
	close($fh);
}

sub notify($$;$)
{
	my ($loglevel, $subject, $message) = @_;

	$message ||= $subject;

	do_log($loglevel, $subject);

	foreach my $recipient ( @notify_recipients)
	{
		if($recipient ne '') {
			notify_send($recipient, $mail_from, "$mail_subject_prefix - $loglevel: $subject", $message);
		}
	} 
}

sub check_env
{
	if(system('emgd -v >/dev/null') != 0) {
		print "Could not run \"emgd -v\", environment does not seem to be set up correctly.\n";
		print "EMGDIR is set to $ENV{EMGDIR}, does that seem correct?\n";
		die;
	}
}

sub check_emg
{
	return if (-e $block_name);
	$emglock->shlock;
	if(system('emgstat >/dev/null') != 0) {
		my $output = start_emgd;
		if (defined($output)) {
			if(time > ($last_notify_emg_startup + $notify_interval)) {
				my $msg = "Output from 'emgd':\n$output\n--\nGeneral log file (last 100 lines):\n" . `tail -100 $emg_dir/log/general`;
				notify(LOG_CRIT, "EMG not running and startup failed", $msg);
				$last_notify_emg_startup = time;
			}
			$emglock->shunlock;
			return;
		}
		if ($start_emgd_manually) {
			sleep 2;
			if(system('emgstat >/dev/null') != 0) {
				if(time > ($last_notify_emg_startup + $notify_interval)) {
					notify(LOG_CRIT, "EMG not running and startup seems to have failed");
					$last_notify_emg_startup = time;
				}
				$emglock->shunlock;
				return;
			}
		}
		notify(LOG_WARN, "EMG was not running but should now be up");
	}
	$last_notify_emg_startup = 0;
	$emglock->shunlock;
}

sub check_diskspace
{
	my $ref = df($fs_to_check);
	my $pc = $ref->{per};

	if($pc > $fs_limit) {
		if(time > ($last_notify_diskspace + $notify_interval)) {
			notify(LOG_WARN, "File system $fs_to_check, $pc% full (limit $fs_limit%)");
			$last_notify_diskspace = time;
		}
	} else {
		$last_notify_diskspace = 0;
	}
}

# Notify and exit if termination signal received
sub caught_signal {
	notify(LOG_INFO, "Stopped");
	exit;
}

# Verify that we can run emg commands
check_env;

# Init stuff
init_all;

# Spawn http server for serving actions
start_http_server;

notify(LOG_INFO, "Started");
sleep 1;

# Set up signal handling
$SIG{INT}  = \&caught_signal;
$SIG{TERM} = \&caught_signal;

# Monitor loop
while(1) {
	check_diskspace;
	check_emg;

	print "Sleeping for $check_interval seconds ... \n";
	sleep $check_interval;
}
