using System; using System.Collections; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; using System.IO; using System.Xml; // The general idea of this program is to look through a folder with XML-files // and delete nodes that match certain content. // A configuration file will tell what nodes to delete if any of the // content specified in the file is found either as element or attribute in // the node or one of it's children. namespace RemoveSpam { class Program { static void Main(string[] args) { Console.WriteLine("\r\n" + "\r\n" + "RemoveSpam by ReSQueL" + "\r\n"); Console.WriteLine("Intended to remove *back spam from dasBlog." + "\r\n"); if (args.Length != 2) { Console.WriteLine("This program takes two arguments."); Console.WriteLine(" 1st the location of the config file."); Console.WriteLine(" 2nd the location of the target folder."); Console.WriteLine("Use double quotes (\") to deal with spaces in the paths."); Console.WriteLine(""); Console.WriteLine("example:"); Console.WriteLine("RemoveSpam \"C:\\My Config Folder\\RemoveSpam.xml\" \\\\MyServer\\MyShare\\TargetFolder"); Console.WriteLine(""); Console.WriteLine("To create a sample config file (includes documentation), use /config;"); Console.WriteLine("RemoveSpam \"C:\\My Config Folder\\RemoveSpam.xml\" /config"); Console.ReadKey(); } else { string arg0 = args[0].ToString(); string arg1 = args[1].ToString(); if (arg1 == "/config") { MakeConfig(arg0); } else { ProcessFolder(arg0, arg1); } } } static void MakeConfig(string ConfigFile) { XmlDocument objConfigFile = new XmlDocument(); objConfigFile.LoadXml( "\r\n" + "\r\n" + " \r\n" + " RemoveSpam was built after a massive attack of trackback and pingback spam\r\n" + " on a blog running dasBlog. While this is not a durable solution against\r\n" + " *back-spam, it may help you clean up the *backs.\r\n" + " The spam is found based on a simple pattern-match against all XML documents\r\n" + " in a given folder (the content folder of dasBlog). It matches the PermaLink\r\n" + " elements within DayExtra, Trackings, Tracking. If a match is found, the\r\n" + " entire Tracking element is deleted.\r\n" + "
\r\n" + " To see some examples of patterns, see Configs element. Each pattern has a Config\r\n" + " element, with the Pattern attribute that shows what pattern is a spamvertised\r\n" + " PermaLink. Pattern is build with the characters from the URL and takes wildcards\r\n" + " \"*\" (star) and \"^\" (caret).\r\n" + " * = 0 or more characters.\r\n" + " ^ = 1 character.\r\n" + " For those wondering, the question mark can not be used as a single poition wildcard,\r\n" + " as it is meaningful and common in URLs.\r\n" + "
\r\n" + " Edit or add Config elements to suit your needs.\r\n" + "
\r\n" + " \r\n" + " \r\n" + " \r\n" + " \r\n" + " \r\n" + " Stan Segers\r\n" + " 2007-06-07\r\n" + " http://www.resquel.com\r\n" + " mailto://query@resquel.com\r\n" + " \r\n" + "
"); try { objConfigFile.Save(ConfigFile); Console.WriteLine("Config file succesfully created.\r\n"); } catch(Exception e) { Console.WriteLine(e.Message); } } static void ProcessFolder(string ConfigFile, string TargetFolder) { //Read the patterns, turn them into RegEx and put them in an ArrayList. ArrayList arrPattern = new ArrayList(); XmlDocument objConfigFile = new XmlDocument(); XmlNodeList objConfigDetails; try { objConfigFile.Load(ConfigFile); objConfigDetails = objConfigFile.SelectNodes("/RemoveSpam/Configs/Config"); foreach (XmlNode objConfigDetail in objConfigDetails) { // Convert the Pattern InnerText to a Regular Expression. // Following rules apply in this order: // 1. Escape all special characters. but not the caret and star // Special characters [\^$.|?*+() // 2. Replace all carets by dots. // 3. Preceed all stars with a single dot. string strConvert = objConfigDetail.Attributes["Pattern"].InnerText; Console.WriteLine("PermaLink pattern: " + strConvert); // Debug line // Conversions 1 strConvert = strConvert.Replace("[", "\\["); strConvert = strConvert.Replace("\\", "\\\\"); strConvert = strConvert.Replace("$", "\\$"); strConvert = strConvert.Replace(".", "\\."); strConvert = strConvert.Replace("|", "\\|"); strConvert = strConvert.Replace("?", "\\?"); strConvert = strConvert.Replace("+", "\\+"); strConvert = strConvert.Replace("(", "\\("); strConvert = strConvert.Replace(")", "\\)"); // Conversion 2 strConvert = strConvert.Replace("^", "."); // Conversion 3 strConvert = strConvert.Replace("*", ".*"); //Add to ArrayList arrPattern.Add(strConvert); }; } catch (Exception e) { Console.WriteLine(e.Message); } // Process a folder and put all filenames in an array. DirectoryInfo objDir = new DirectoryInfo(TargetFolder); FileInfo[] objFile = objDir.GetFiles("*.xml"); try { if (objFile.Length == 0) { Console.WriteLine("No .xml files found in " + TargetFolder + "!"); } else for (int i = 0; i < objFile.Length; i++) { ProcessFile(arrPattern, objFile[i].FullName); } } catch (Exception e) { Console.WriteLine(e.Message); } } static void ProcessFile(ArrayList Pattern, string FileName) { string strNameSpace = "urn:newtelligence-com:dasblog:runtime:data"; string strPrefix = "dasBlog"; string strXPath = "/dasBlog:DayExtra/dasBlog:Trackings/dasBlog:Tracking/dasBlog:PermaLink"; bool boolChanged = false; XmlDocument objContentFile = new XmlDocument(); try { Console.WriteLine("Processing file: " + FileName); // Debug line objContentFile.Load(FileName); XmlNamespaceManager objNsMgr = new XmlNamespaceManager(objContentFile.NameTable); objNsMgr.AddNamespace(strPrefix, strNameSpace); // Deal with the XMLNameSpace. XmlNodeList objContentDetails = objContentFile.SelectNodes(strXPath, objNsMgr); foreach (XmlNode objContentDetail in objContentDetails) { string strConvert = objContentDetail.InnerText; for (int i = 0; i < Pattern.Count; i++) { Regex objRegexPattern = new Regex(Pattern[i].ToString(), RegexOptions.IgnoreCase); if (objRegexPattern.IsMatch(strConvert) == true) { try { Console.WriteLine("Match found: " + strConvert); // objContentDetail.ParentNode.RemoveAll(); // Commented line above left an empty Tracking element in the document. // Suspected to cause errors with the dasBlog trackbackList macro. // Changed 2007-06-18 objContentDetail.ParentNode.ParentNode.RemoveChild(objContentDetail.ParentNode); boolChanged = true; } catch (Exception e) { Console.WriteLine(e.Message); } } } } // Write back the XML after cleaning. if (boolChanged == true) { objContentFile.Save(FileName); } } catch (Exception e) { Console.WriteLine(e.Message); } } } }