//////////////////////////////////////////////////////////
//
//  Exchange IMF Whitelist/Keyword Script
//  11/9/2005 - vorbau [at] smurf [dot] to
//
//  This script works with the IMF plugin for Exchange, 
//  reading out of the archive bucket that IMF flags as
//  spam.
//
//  There are two components, but could easily be
//  expanded to more components through the use of
//  pattern arrays and regular expressions.
//
//  The filter loads a pattern file into memory then
//  searches each email message with a regular expression
//  search.  This is very slow, but it beats the hell
//  out of reading each message by hand.
//
//
//  Usage from the cmd line:  cscript spam_check.js
//  Or double-click the script icon.
//
//////////////////////////////////////////////////////////


// Change these paths as needed.
//
// The double \\ is needed for your path separators.

// Remote setup:
// Path to Exchange's buckets
var root = "Q:";
var exchpath = root + "\\UceArchive\\";
var pickup = root + "\\PickUp\\";
var dump = root + "\\Spam\\";

// The script paths
var wlpath = "S:\\Spam\\whitelist.txt";
var kwpath = "S:\\Spam\\spam_keywords.txt";
var logpath = "S:\\Spam\\spam.log";


// Local Setup:
// 
if (WScript.Arguments.length != 0) 
{
	// Path to Exchange's buckets
	root = "D:\\Exchange\\mailroot\\vsi 1";
	exchpath = root + "\\UceArchive\\";
	pickup = root + "\\PickUp\\";
	dump = root + "\\Spam\\";

	// The script paths
	wlpath = "D:\\Spam\\whitelist.txt";
	kwpath = "D:\\Spam\\spam_keywords.txt";
	logpath = "D:\\Spam\\spam.log";
}


////////////////////////////////
////////////////////////////////
//// END USER CONFIGURATION ////
////////////////////////////////
////////////////////////////////

// Progress bar variables
var twiddleArr = new Array("|","/","-","\\");
var index = 0;


// Magic!
var ForReading = 1, ForWriting = 2, ForAppending = 8, TristateUseDefault = -2;
var pattern = 0, rank = 1, regex = 2; 
var blocksize = 4196;

// Setup our environment
var fso = new ActiveXObject("Scripting.FileSystemObject");
var ifldr, ifc

try
{
	ifldr = fso.GetFolder(exchpath);
	ifc =  new Enumerator(ifldr.Files);
}
catch (e)
{
	WScript.StdErr.WriteLine("Error opening mailroot: "+e.description);
	WScript.Quit();
}

WScript.StdOut.WriteLine("Total messages to search: " + ifldr.Files.Count);


// Load the whitelist
var wl = loadWordList(wlpath);

if (wl.length != 0)
{
	WScript.StdOut.WriteLine("Whitelist addresses loaded: " + wl.length);
	fso.CopyFile(wlpath, wlpath + ".bak", true);
}
else 
{
	WScript.StdOut.WriteLine("No whitelist addresses");
}

// Load the keywords
var kw = loadWordList(kwpath);

if (kw.length != 0)
{
	WScript.StdOut.WriteLine("Keywords loaded: " + kw.length);
	fso.CopyFile(kwpath, kwpath + ".bak", true);
}
else
{
	WScript.StdOut.WriteLine("No spam keywords");
}

WScript.StdOut.WriteLine();
WScript.StdOut.WriteLine();


//
// Start the filtering
//

var prev, curr;
var count = 0, ham = 0, spam = 0;


// Open the log for appending
try {
	var log = fso.OpenTextFile(logpath, ForAppending, true);

} catch (e) {
	WScript.StdErr.WriteLine("Error opening log file: "+e.description);
	WScript.StdErr.WriteLine("Is the script running somewhere else?");
	WScript.Quit();
}

sort(kw);


var eml, str;

for (; !ifc.atEnd(); ifc.moveNext())
{

	try {

		eml = fso.OpenTextFile(ifc.item(), ForReading, false, TristateUseDefault);

		// Whitelist RegExp ...  this pattern never changes
		var re = new RegExp("^x-sender: (.*)$");
		re.compile("^x-sender: (.*)$", "i");
		var are = new RegExp("^(.*@.*)$");
		are.compile("^(.*@.*)$", "i");

		if (!eml.AtEndOfStream )
		{
			// get the first line (we hope x-sender)
			str = eml.ReadLine();
			
			
			if ((curr = checkWhiteList(str)) != null)
			{
				WScript.StdOut.WriteLine("\r");
				WScript.StdOut.WriteLine(ifc.item() + ":\nWhitelist:\t" + curr + "\n");
				writeLog(ifc.item() + " - Whitelist: " + curr);

				eml.Close();
				fso.MoveFile(ifc.item(), pickup);
				
				ham++;
			}
			else if ((curr = checkKeywords(eml)) != null)
			{
				
				if (count % 10 == 0 && count > 0) {
					//WScript.StdOut.Write("Sorting...");
					sort(kw);
					//WScript.StdOut.WriteLine("done.");
				}
				
				WScript.StdOut.WriteLine("\r");
				WScript.StdOut.WriteLine(ifc.item() + ":\nKeyword:\t" + curr + "\n");
				eml.Close();
				fso.MoveFile(ifc.item(), dump);

				spam++;
			
			}
			else
			{
				eml.Close();
			}
			
		}
	
		
		doProgressBar(count, (ifldr.Files.Count + spam));
		count++;
	
	} catch (e) {
			WScript.StdErr.WriteLine(e.description);
	}
}

WScript.StdOut.WriteLine("\nMessages Scanned:\t" + count +
						 "\nWhitelist Hits:\t\t" + ham +
						 "\nKeyword Hits:\t\t" + spam);
writeLog("- Complete: T/" + count + " W/" + ham + " S/" + spam);
			
log.Close();

sort(kw);

saveWordList(kw, kwpath);
saveWordList(wl, wlpath);


WScript.Quit();




// Match the passed string in the whitelist
function checkWhiteList(addr) 
{

	//WScript.StdOut.WriteLine(addr);

	if (re.test(addr)) 
	{
		for (i in wl)
		{
			//var wladdr = wl[i][pattern];
			
			// strip the address
			// are defined in the main loop
			var arr = are.exec(addr);
			var addr2 = RegExp.$1 + "";
			var re2 = wl[i][regex];
			
			//WScript.StdOut.WriteLine(re2 + " -> " + addr2);
			
			// test if the domain matches
			if (re2.test(addr2))
			{
				
				wl[i][rank] += Math.ceil(wl.length * 1.1);	// 10% bump
				return addr2;
			}
		}
	}
	
	return null;
}


// Match the keyword list to the email
function checkKeywords(eml) 
{
	//WScript.StdOut.Write("Checking Keywords");

	if (eml == null)
		return null;

	if (!eml.AtEndOfStream)
	{
		//var str = eml.ReadAll();
		var str = eml.Read(blocksize);
		
		if (str != "")
		{
			for (i in kw)
			{
				var keyre = kw[i][pattern];
				var re = kw[i][regex];

				//if (i % 40 == 0) WScript.StdOut.Write(".");

				if (re.test(str))
				{
					//WScript.StdOut.WriteLine();
					kw[i][rank] += 10;

					//WScript.StdOut.WriteLine(keyre);

					return keyre + " [" + i + "]:\n\t"+ RegExp.$1;
				}

				//if (i % 100 == 0)
				//	doProgressBar(count, ifldr.Files.Count);
			}
		}
	}
	
	return null;
}


// Load our word lists
//
// Ignore lines starting with # and blank lines
function loadWordList(path)
{
	var wlarray = new Array();
	var read = 0;

	var fin = fso.OpenTextFile(path);
	
	while (!fin.AtEndOfStream)
	{
		var wl = fin.ReadLine();

		wl.replace(/\n/,"");

		if (wl != "") {
			var tmp = new RegExp(wl,"i");
			tmp.compile("(" + wl + ")", "i");
		
			wlarray.push(new Array(wl, 0, tmp));
			read++;
		}
	}
	
	fin.Close();
	
	for (i in wlarray) wlarray[i][rank] = read - i;
	
	return wlarray;
}


function saveWordList(wl, path)
{
	// writing
	var fin = fso.OpenTextFile(path, 2);
	
	for (i in wl)
		fin.WriteLine(wl[i][pattern]);
	
	fin.Close();
	
	WScript.StdOut.WriteLine("Wrote " + wl.length + " patterns to " + path);
}

function countLines(path)
{
	var fin = fso.OpenTextFile(path);
	var i;
	
	while (!fin.AtEndOfStream)
	{
		fin.ReadLine();
		i++;
	}
	
	fin.Close();
	
	return i;
}
	




// Sorting
//
//

function sort(a)
{
	
	//a.reverse();
	bubbleSort(a);
	//a.reverse();
	
}

function bubbleSort(array)
{
  var i, j;
  var temp;
  var test;
 
  
  for (i = array.length - 1; i > 0; i--)
  {
    test=0;
    for (j = 0; j < i; j++)
    {
      if (array[j][1] < array[j+1][1])
      {
        temp = array[j];    /* swap array[j] and array[j+1] */
        array[j] = array[j+1];
        array[j+1] = temp;
        test=1;
      }
    }
    if (test==0) 
    	break;
  }
      
}


// Progress Bar stuff
//
//

function doProgressBar(cur, max)
{

	var progress = Math.ceil((cur * 100) / max);
	var width = Math.floor(progress*50/90);
	
	if (max > 0) {
		WScript.StdOut.Write("\r");

		twiddle();
		WScript.StdOut.Write(" [ " + progress + "% ] ");
	
		for (j=0; j<width; j++) {
			WScript.StdOut.Write("=");
		}
		WScript.StdOut.Write("> ");
	}
	
}

function twiddle() {
	WScript.StdOut.Write(twiddleArr[index++]);

	if (index > twiddleArr.length - 1 ) {
		index = 0;
	}
}

function writeLog(s) {
	var d = new Date();
	log.Write(d.toString() + ":\t" + s + "\r\n");
}

