#!/usr/bin/env perl
use strict;
# vim:ts=3:sw=3:noet
# Walter Doekes (C) 20060412 for Studenten.net
# License: GPLv3
#
# Call us as "/cgi-bin/$0?some@email.address".
# We'll reply with an HTTP status code.
# When you get 5xx, you can safely assume the email address is false.
#
#
# Changelog:
# * 20060503:
#	- Using the second/other priority MX's as well, but try the highest
#	  first.
#	- Also trying 3 times to connect, because some overloaded servers
#	  respond with an RST.
#	- Dies with invalid address when IP 127.0.0.1 is encountered, like it
#	  should (because that's an invalid MX).
#
# * 20060505:
#	- Fixed that a failing dig does not exit, but returns void like it
#	  should. Redirects stderr to /dev/null and returns if hostname
#	  starts with a minus (option) sign.
#
# * 20060509:
#	- Fixed that we try the A record if no MX is available.
#
# * 20060517:
#	- Added a timeout for connect. Some of hotmails servers time out
#	  a lot. Try a different one after 5 seconds (there are plenty of
#	  others).
#
# * 20060614:
#	- Added CONNECT_TIMEOUT variable. Fixed so that alarm doesn't fire
#	  after connect has succeeded but script is waiting (e.g. on a server
#	  doing a blacklist check).
#
# * 20061213:
#	- Added an IO_TIMEOUT.
# 
# * 20070122:
#	- Added RSET before QUIT. This should be more server-friendly.
#	- Removed the /usr/bin/dig parsing, using Net::DNS now instead.
#	- Changed around a lot with status codes. Now we should be able
#	  track down mailserver oddities more swiftly.
#
# * 20070823:
#	- Added mysql cache. See MYSQL_* settings and CACHE_TIME.
#	  CREATE TABLE smtpvrfy_cache (email VARCHAR(128) NOT NULL PRIMARY KEY,
#		 INDEX(email), checked DATETIME NOT NULL, status INT NOT NULL,
#		 message VARCHAR(255) NOT NULL, extra VARCHAR(255) NOT NULL);
#	- Shuffled around a bit in the code to accomodate for the cache.
#
# * 20080109:
#	- Don't use the cache in debug mode.
#	- Added write timeout.
#	- Added MY_SOURCEIP to use if you want to bind to a particular IP.
#
# * 20080221:
#	- Changed TRY_AGAIN http status code to 401. Firefox interprets 408
#	  nowadays, which causes a browser error to appear. All scripts should
#	  only use the first digit, so this change should not matter.
#
# * 20080225:
#	- Replaced <> (diamond operator) reading to recv-reading. We were
#	  using select and "[one] should not attempt to mix buffered I/O
#	  (like read or <FH>) with select, except as permitted by POSIX, 
#	  and even then only on POSIX systems. You have to use sysread
#	  [or recv] instead."
#
# * 20080319:
#	- Changed <test@somehostname> to <noreply@somehostname>, an
#	  address which must exist. Some servers do source address
#	  verification...
#
# * 20080403:
#	- Fixed recv-reading. Multiline talk from peer failed miserably.
#	- Added 421 handling on welcome.
#
# * 20080806:
#	- Fixed recv-reading when peer sends less than usual data.
#	- Minor cleanup before publishing.
#


########################################
# S E T T I N G S
########################################

# Some servers explicitly check the source address and accept only
# the reverse hostname for that IP. It helps if you set the following
# values:
my $MY_SOURCEIP = undef; # e.g. '1.2.3.4'
my $MY_HOSTNAME = undef; # e.g. 'host01020304.your.ptr';
# The FROM test needs a valid(ish) e-mail address. YOU MUST SET THIS
my $MY_EMAIL_ADDRESS = 'email-address-that-does-exist@yourown.domain';
# Mysql caching?
my $MYSQL_DB = ''; # doesn't use caching if unset, otherwise, set db name
my $MYSQL_USER = 'myusername';
my $MYSQL_PASS = 'mypassword';
my $CACHE_TIME = 600;
# Print all kinds of debug messages. Set to 0 to use it as a cgi script.
my $DEBUG = 0;
# Allow the connect (syn, syn+ack, ack) to take at most N seconds.
my $CONNECT_TIMEOUT = 5;
# Allow the connected session to take at most N seconds. (Make it large..!)
my $IO_TIMEOUT = 300;
# Try every smtp server N times.
my $RETRY_COUNT = 3;


########################################
# D E F I N E S
########################################

# Success
my @RCPTTO_SUCCESS = (250);
# Servfail, Mailbox busy, No disk space, No disk space
my @RCPTTO_EAGAIN = (421, 450, 452, 552);
# General error, Non-local user, Ambiguous user / Disallowed, Transaction failed
my @RCPTTO_EINVAL = (550, 551, 553, 554);
# Own errors
my %STATUS = (
	NO_EMAIL_PARSE => [5, '501 Email does not look like a valid e-mail address.'],
	BAD_DOMAIN => [5, '502 Domain does not exist or unparseable IP.'],
	CONNECT_FAILED => [5, '503 Connect timed out or rejected.'],
	IO_TIMEOUT => [5, '504 Read/write timeout.'],
	PROTO_ERROR => [5, '505 Protocol error.'],
	INTERNAL_ERROR => [4, '401 Internal error, try again later.'],
	TRY_AGAIN => [4, '400 Temporary error, address might be valid.'],
	IO_TIMEOUT2 => [4, '400 Temporary error, read/write timeout.'],
	INVALID => [5, '500 User does not exist.'],
	UNKNOWN => [4, '402 Unknown situation. Mail the developer.'],
	SUCCESS => [2, '200 Address validated.'],
);


########################################
# G L O B A L S
########################################

# Load modules
use Socket;
eval 'use DBI;' if $MYSQL_DB; # (libdbi-perl) for sql cache
use Net::DNS; # (libnet-dns-perl)

# Create objects
my $RESOLVER = Net::DNS::Resolver->new();
my $DBHANDLE;
$DBHANDLE = DBI->connect("DBI:mysql:$MYSQL_DB", $MYSQL_USER, $MYSQL_PASS) if $MYSQL_DB;


########################################
# F U N C T I O N S
########################################

# Reads a single line from the SMTP socket
# @return aLineOrEmptyOnError
{
	my $buffer = ''; # global to smtp_gets only
	sub smtp_gets {
		# Add to buffer unless we have a line already
		while ($buffer !~ /\n/s) {
			print STDERR "|| (filling read buffer)\n" if $DEBUG;
			my ($rin, $win, $ein);
			$rin = $win = $ein = '';
			vec($rin, fileno(SMTP), 1) = 1;
			$ein = $rin;
			my $nfound = select($rin, $win, $ein, $IO_TIMEOUT);
			if (int($nfound) == 0) {
				print STDERR "|| (read timeout)\n" if $DEBUG;
				return '';
			}
			my $buf;
			my $ret = recv(SMTP, $buf, 8192, 0);
			if (!defined($ret)) {
				print STDERR "|| (EOF or Error) ($!)\n" if $DEBUG;
				return '';
			}
			$buffer .= $buf;
			print STDERR "buf=\"\"\"$buffer\"\"\"\n" if $DEBUG > 1;
		}
		$buffer =~ s/^(.*?)\r?\n//s;
		print STDERR "<< $1\n" if $DEBUG;
		return $1;
	}
}

# Puts data on the SMTP socket
# @param data The string to send.
# @return successBoolean 
sub smtp_puts {
	my $line = shift;
	my ($rin, $win, $ein);
	$rin = $win = $ein = '';
	vec($win, fileno(SMTP), 1) = 1;
	$ein = $win;
	my $nfound = select($rin, $win, $ein, $IO_TIMEOUT);
	return 0 if int($nfound) == 0; # timeout..
	# Go, go, go
	print STDERR ">> $line\n" if $DEBUG;
	# Assume we'll send all in a single send.. these are tiny messages..
	my $ret = send(SMTP, $line . "\r\n", 0);
	return 0 if $ret == 0;
	return 1; # "Everything" is sent, we hope ;)
}


# Reads protocol status code from the SMTP socket,
# takes void,
# returns (intCode, strMessage).
sub get_smtp_status {
	my $line = smtp_gets();
	return (int(400), "400 Read timeout.") if $line eq ''; # timeout or EOF..
	$line =~ /^(\d{3})([ -])(.*)$/;
	my @ret = (int($1), "$1 $3");
	# Flush input buffer that starts with "nnn-..."
	while ($2 eq '-') {
		$line = smtp_gets();
		return (int(400), "400 Read timeout.") if $line eq ''; # timeout or EOF..
		$line =~ /^($1)([ -])(.*)$/;
	}	
	return @ret;
}


# Query smtp server
# takes strUserPart, strDomainPart
# returns (intStatus[2..5], strMessage, strExtraInfo).
sub query_user_through_smtp {
	my ($user, $domain) = @_;
	my @mxs_with_pref = mx($RESOLVER, $domain);
	my @mxs;
	foreach my $mx (@mxs_with_pref) {
		push @mxs, $mx->exchange;
	}
	# RFC says we have to try the domain if no MX record is listed.
	push @mxs, $domain if @mxs == 0;
			
	print STDERR "|| Data: user = $user, domain = $domain\n" if $DEBUG;

	my ($i, $mx, $iaddr, $paddr, $haddr);
	TRYING: for($i = 0; $i < $RETRY_COUNT; ++$i) {
		foreach $mx (@mxs) {
			socket(SMTP, PF_INET, SOCK_STREAM, getprotobyname('tcp'))
					or return @{$STATUS{INTERNAL_ERROR}}, 'Can\'t create socket.';
			# We may want to use a particular source IP. If it fails, ignore it.
			bind SMTP, sockaddr_in(0, inet_aton($MY_SOURCEIP)) if $MY_SOURCEIP;

			print STDERR "|| Connect: " . $mx . "\n" if $DEBUG; 
			$iaddr = inet_aton($mx)
					or return @{$STATUS{BAD_DOMAIN}}, 'Failed inet_aton.';
			$haddr = inet_ntoa($iaddr);
			print STDERR "|| Ip: $haddr\n" if $DEBUG;
			return @{$STATUS{BAD_DOMAIN}}, 'Hostname resolves to localhost.'
					if $haddr eq '127.0.0.1';
			$paddr = sockaddr_in(25, $iaddr);
			eval {
				local $SIG{ALRM} = sub { return; };
				alarm($CONNECT_TIMEOUT);
				print STDERR "|| (alarm $CONNECT_TIMEOUT seconds)\n" if $DEBUG;
				last TRYING if connect(SMTP, $paddr);
				print STDERR "|| (alarm didn't fire!)\n" if $DEBUG;
				alarm(0);
			}
		}
	}
	alarm(0);
	return @{$STATUS{CONNECT_FAILED}}, 'All MX\'s timed out or rejected us.'
			if $i == $RETRY_COUNT; # failure after N tries
	print STDERR "|| (connected)\n" if $DEBUG;

	$iaddr = getsockname(SMTP);
	my ($port, $saddr) = sockaddr_in($iaddr);
	$saddr = $MY_HOSTNAME || inet_ntoa($saddr);

	my (@ret, @status);
	@ret = get_smtp_status();
	if ($ret[0] != 220) {
		shutdown(SMTP, 2);
		return @{$STATUS{TRY_AGAIN}}, 'Got 421 (try again later) on welcome.'
				if $ret[0] == 421;
		return @{$STATUS{PROTO_ERROR}}, 'No 220 in welcome.';
	}
	return @{$STATUS{IO_TIMEOUT2}}, 'Write timeout on HELO.'
		unless smtp_puts("HELO $saddr");
	@ret = get_smtp_status();
	if ($ret[0] != 250) {
		smtp_puts("QUIT");
		shutdown(SMTP, 2);
		return @{$STATUS{PROTO_ERROR}}, 'No 250 on HELO.';
	}
	return @{$STATUS{IO_TIMEOUT2}}, 'Write timeout on MAIL FROM.'
		unless smtp_puts("MAIL FROM: <$MY_EMAIL_ADDRESS>");
	@ret = get_smtp_status();
	if ($ret[0] != 250) {
		smtp_puts("QUIT");
		shutdown(SMTP, 2);
		return @{$STATUS{PROTO_ERROR}}, 'No 250 on MAIL FROM.';
	}
	return @{$STATUS{IO_TIMEOUT2}}, 'Write timeout on RCPT TO.'
		unless smtp_puts("RCPT TO: <$user\@$domain>");
	@ret = get_smtp_status();
	smtp_puts("RSET"); # Microsoft sends 5xx instead of 2xx.
	get_smtp_status();
	smtp_puts("QUIT");
	shutdown(SMTP, 2); # skip shutdown if timeout is reached..
							 # don't want fins taking too long

	if (grep(/$ret[0]/, @RCPTTO_SUCCESS)) {
		return @{$STATUS{SUCCESS}}, 'Peer said: ' . $ret[1];
	} elsif (grep(/$ret[0]/, @RCPTTO_EAGAIN)) {
		return @{$STATUS{TRY_AGAIN}}, 'Peer said: ' . $ret[1];
	} elsif (grep(/$ret[0]/, @RCPTTO_EINVAL)) {
		return @{$STATUS{INVALID}}, 'Peer said: ' . $ret[1];
	} else {
		return @{$STATUS{UNKNOWN}}, 'Peer said: ' . $ret[1];
	}
}

# Get the query from sql cache if available,
# takes strUser, strDomain,
# returns (intStatus[2..5], strMessage, strExtraInfo).
sub query_user_from_cache {
	return unless $DBHANDLE and $DEBUG == 0;
	my @ARGS = @_; # pass-by-value semantics
	foreach (@ARGS) { s/'/\\'/g; }
	my ($user, $domain) = @ARGS;
	my ($r, @status);
	# Flush old stuff
	$r = $DBHANDLE->prepare("DELETE FROM smtpvrfy_cache WHERE TIME_TO_SEC(TIMEDIFF(NOW(), checked)) > $CACHE_TIME");
	$r->execute;
	$r->finish;
	# Get current email
	$r = $DBHANDLE->prepare("SELECT status, message, extra FROM smtpvrfy_cache WHERE email = '$user\@$domain'");
	$r->execute;
	@status = $r->fetchrow_array if $r;
	@status[2] .= " (cached)" if @status;
	$r->finish;
	
	return @status;
}

# Insert the status message into the cache,
# takes strUser, strDomain, intStatus, strMessage, strExtraInfo
# returns void.
sub insert_user_into_cache {
	return unless $DBHANDLE and $DEBUG == 0;
	my @ARGS = @_; # pass-by-value semantics
	foreach (@ARGS) { s/'/\\'/g; }
	my ($user, $domain, $status, $message, $extra) = @ARGS;
	my $r;
	# Insert
	$r = $DBHANDLE->prepare("INSERT INTO smtpvrfy_cache (email, checked, status, message, extra) "
			. "VALUES ('$user\@$domain', NOW(), $status, '$message', '$extra')");
	$r->execute;
	$r->finish;
}

# Check the email address for validity,
# takes strEmail,
# returns (intStatus[2..5], strMessage, strExtraInfo).
sub get_email_account_status {
	my $email = shift;
	$email =~ tr/A-Z/a-z/;

	return @{$STATUS{NO_EMAIL_PARSE}}, 'Failed regex "^(.+)@([^\.].*\.[a-z]{2,})$".'
		unless $email =~ /^(.+)@([^\.].*\.[a-z]{2,})$/s;
	my ($user, $domain) = ($1, $2);
	my @status;
	if ((@status = query_user_from_cache($user, $domain)) != undef) {
		print STDERR "|| Got data from cache\n" if $DEBUG;
	} elsif ((@status = query_user_through_smtp($user, $domain)) != undef) {
		print STDERR "|| Got data from smtp query\n" if $DEBUG;
		insert_user_into_cache($user, $domain, @status);
	} else {
		print STDERR "|| Got nothing" if $DEBUG;
		@status = (4, 'Internal program error', 'query_user_through_smtp returned void');
	}
	return @status;
}


########################################
# M A I N
########################################

my $request_uri = $ARGV[0];
my @status = get_email_account_status($request_uri);
if (1) {
	print "Status: 200\r\n" if $status[0] == 2;
	print "Status: 401\r\n" if $status[0] == 4; # Firefox interprets 408 suddenly, using 401.
	print "Status: 502\r\n" if $status[0] == 5;
	print "Content-type: text/plain\r\n";
	print "\r\n";
	print "$status[1] ($status[2])\r\n";
} else {
	print "Iets anders @status\n";
}

