Difference between revisions of "SpamFerret.php"
(→Code: 2/25/09 version - includes fix for MW 1.14) |
(2009-03-26 version) |
||
Line 3: | Line 3: | ||
==Code== | ==Code== | ||
− | <php><?php/* | + | <php><?php |
+ | /* | ||
HISTORY: | HISTORY: | ||
2007-08-19 (Wzl) fixed line 155 call to clsDatabase::Query() (method deprecated and removed) | 2007-08-19 (Wzl) fixed line 155 call to clsDatabase::Query() (method deprecated and removed) | ||
Line 20: | Line 21: | ||
2008-10-21 (Wzl) Fixed minor syntax error in "defines" | 2008-10-21 (Wzl) Fixed minor syntax error in "defines" | ||
2009-02-25 (Wzl) $objArticleCurr->loadLastEdit() now causes error in MW 1.14 (was it necessary before?) | 2009-02-25 (Wzl) $objArticleCurr->loadLastEdit() now causes error in MW 1.14 (was it necessary before?) | ||
+ | 2009-03-10 (Wzl) "require" -> "require_once" so other extensions can use data.php without conflict | ||
+ | also optional $kfpWzlLibs so data.php can be somewhere not on the path | ||
+ | 2009-03-18 (Wzl) Got rid of shared.php requirement; now using kfpLib to locate data.php | ||
+ | 2009-03-26 (Wzl) Rewrote data library calls to use new classes (no longer using deprecated/removed classes) | ||
+ | Also modified to use newer function hooks | ||
TO DO: | TO DO: | ||
* Log matching text for regex filters | * Log matching text for regex filters | ||
Line 25: | Line 31: | ||
Possibly non-spam from a throttled IP should not update the "WhenLast" timestamp. Maybe this should be a LocalSettings option? | Possibly non-spam from a throttled IP should not update the "WhenLast" timestamp. Maybe this should be a LocalSettings option? | ||
* Figure out how to display a different error message than "the following text is what triggered our filter:" | * Figure out how to display a different error message than "the following text is what triggered our filter:" | ||
+ | OPTIONAL SETTINGS: | ||
+ | kfpLib - path to data.php folder (no final slash) | ||
+ | kfsLib_Data - filespec of data.php | ||
*/ | */ | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
# Loader for spam blacklist feature | # Loader for spam blacklist feature | ||
Line 39: | Line 40: | ||
if ( defined( 'MEDIAWIKI' ) ) { | if ( defined( 'MEDIAWIKI' ) ) { | ||
− | |||
− | |||
+ | $wgExtensionCredits['other'][] = array( | ||
+ | 'name' => 'SpamFerret', | ||
+ | 'author' => 'Woozle Staddon', | ||
+ | 'url' => 'http://htyp.org/SpamFerret', | ||
+ | 'version' => '2009-03-26', | ||
+ | 'description' => 'database-driven wikispam blocker', | ||
+ | ); | ||
+ | |||
+ | if (!defined('kfsLib_Data')) { | ||
+ | if (defined('kfpLib')) { | ||
+ | define('kfsLib_Data', kfpLib.'/data.php'); | ||
+ | } else { | ||
+ | define('kfsLib_Data','data.php'); // assume it's on the path | ||
+ | } | ||
+ | } | ||
+ | require_once kfsLib_Data; | ||
+ | |||
+ | /* ============== | ||
+ | SET UP CALLBACKS | ||
+ | */ | ||
global $wgFilterCallback, $wgPreSpamFilterCallback; | global $wgFilterCallback, $wgPreSpamFilterCallback; | ||
− | if ( | + | $wgPreSpamFilterCallback = false; |
− | + | if ( defined( 'MW_SUPPORTS_EDITFILTERMERGED' ) ) { | |
+ | $wgHooks['EditFilterMerged'][] = 'wfSpamFerretMerged'; | ||
} else { | } else { | ||
− | + | if ( $wgFilterCallback ) { | |
+ | $wgPreSpamFilterCallback = $wgFilterCallback; | ||
+ | } | ||
+ | $wgFilterCallback = 'wfSpamFerretFilter'; | ||
} | } | ||
− | $ | + | /* |
− | $ | + | $wgHooks['EditFilter'][] = 'wfSpamFerretValidate'; |
− | + | $wgHooks['ArticleSaveComplete'][] = 'wfSpamFerretArticleSave'; | |
− | + | $wgHooks['APIEditBeforeSave'][] = 'wfSpamFerretAPIEditBeforeSave'; | |
− | + | */ | |
− | + | ||
− | + | /* ================ | |
− | ); | + | SET GLOBAL OBJECTS |
+ | */ | ||
+ | function GetSpamFerret() { | ||
+ | static $objFerret; | ||
+ | |||
+ | if (!isset($objFerret)) { | ||
+ | $objFerret = new SpamFerret(); | ||
+ | } | ||
+ | return $objFerret; | ||
+ | } | ||
+ | /* ================ | ||
+ | CALLBACK FUNCTIONS | ||
+ | */ | ||
+ | /** | ||
+ | * Hook function for $wgFilterCallback | ||
+ | */ | ||
+ | function wfSpamFerretFilter( &$title, $text, $section, &$hookErr, $editSummary ) { | ||
+ | global $wgOut; | ||
+ | |||
+ | $spamObj = GetSpamFerret(); | ||
+ | $wgOut->addWikiText( "Intercepted by SpamFerretFilter" ); | ||
+ | return $spamObj->filter( $title, $text, $section ); | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Hook function for EditFilterMerged, replaces wfSpamBlacklistFilter | ||
+ | */ | ||
+ | function wfSpamFerretMerged( &$editPage, $text, &$hookErr, $editSummary ) { | ||
+ | global $wgTitle,$wgOut; | ||
+ | |||
+ | if( is_null( $wgTitle ) ) { | ||
+ | # API mode | ||
+ | # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist | ||
+ | return true; | ||
+ | } | ||
− | + | $spamObj = GetSpamFerret(); | |
− | + | $title = $editPage->mArticle->getTitle(); | |
− | + | $ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage ); | |
+ | if ( $ret !== false ) $editPage->spamPage( $ret ); | ||
− | + | // additional text can be added here: | |
− | + | // $wgOut->addWikiText( "Intercepted by SpamFerretMerged" ); | |
− | |||
− | + | // Return convention for hooks is the inverse of $wgFilterCallback | |
+ | return ( $ret === false ); | ||
} | } | ||
+ | /** | ||
+ | * Hook function for APIEditBeforeSave | ||
+ | */ | ||
+ | function wfSpamFerretAPIEditBeforeSave( &$editPage, $text, &$resultArr ) { | ||
+ | } | ||
+ | |||
class SpamFerret { | class SpamFerret { | ||
− | |||
− | |||
− | |||
var $previousFilter = false; | var $previousFilter = false; | ||
// internal data | // internal data | ||
Line 82: | Line 143: | ||
var $doClearThrottle; | var $doClearThrottle; | ||
− | function | + | // function SpamFerret() { |
− | + | // } | |
+ | |||
+ | function Setting($iName) { | ||
+ | global $wgSpamFerretSettings; | ||
− | + | return $wgSpamFerretSettings[$iName]; | |
− | |||
− | |||
} | } | ||
− | function filter( &$title, $text, $section ) { | + | function filter( &$title, $text, $section, $editSummary, $editPage ) { |
− | |||
global $wgArticle, $wgDBname, $wgMemc, $messageMemc, $wgVersion, $wgOut; | global $wgArticle, $wgDBname, $wgMemc, $messageMemc, $wgVersion, $wgOut; | ||
global $wgTitle, $wgServer; | global $wgTitle, $wgServer; | ||
Line 97: | Line 158: | ||
global $debug; | global $debug; | ||
global $errNum, $errStr; | global $errNum, $errStr; | ||
+ | // debugging: | ||
+ | global $sql; | ||
$fname = 'wfSpamFerretFilter'; | $fname = 'wfSpamFerretFilter'; | ||
Line 109: | Line 172: | ||
} | } | ||
} | } | ||
− | $retVal = | + | // initialize variables |
+ | $retVal = FALSE; // default = assume edit is ok | ||
+ | $msgEmail = FALSE; | ||
// get the IP address of the http client making the edit attempt: | // get the IP address of the http client making the edit attempt: | ||
$this->strIPAddr = wfGetIP(); | $this->strIPAddr = wfGetIP(); | ||
// Open the database | // Open the database | ||
− | $this->dbSpam = new clsDatabase($this->dbspec); | + | $this->dbSpam = new clsDatabase($this->Setting('dbspec')); |
+ | $this->dbSpam->Open(); | ||
// open clients table (extended Throttle version) for reference: | // open clients table (extended Throttle version) for reference: | ||
//return TRUE; | //return TRUE; | ||
− | $objTblClients = new | + | $objTblClients = new clsTable($this->dbSpam,'ClientThrottle','ID'); |
// Look up to see if this IP is known; it may already be throttled: | // Look up to see if this IP is known; it may already be throttled: | ||
$this->objDataClients = $objTblClients->GetData('Address="'.$this->strIPAddr.'"'); | $this->objDataClients = $objTblClients->GetData('Address="'.$this->strIPAddr.'"'); | ||
if (is_object($this->objDataClients)) { | if (is_object($this->objDataClients)) { | ||
− | if ($this->objDataClients-> | + | if ($this->objDataClients->hasRows()) { |
$isClientKnown = true; | $isClientKnown = true; | ||
} | } | ||
} | } | ||
if ($isClientKnown) { | if ($isClientKnown) { | ||
− | $this->idClient = $this->objDataClients-> | + | $this->idClient = $this->objDataClients->ID; |
− | $doBlock = $this->objDataClients-> | + | $doBlock = $this->objDataClients->doBlock; |
if ($doBlock) { | if ($doBlock) { | ||
$strThrType = 'BLK'; | $strThrType = 'BLK'; | ||
} else { | } else { | ||
− | $intRetries = $this->objDataClients-> | + | $intRetries = $this->objDataClients->Retries; |
− | $intThrottle = $this->throttle_retries; | + | $intThrottle = $this->Setting('throttle_retries'); |
$doBlock = $intRetries > $intThrottle; | $doBlock = $intRetries > $intThrottle; | ||
$strThrType = 'THR-'.$intRetries; | $strThrType = 'THR-'.$intRetries; | ||
Line 138: | Line 204: | ||
if ($doBlock) { | if ($doBlock) { | ||
// retry limit exceeded; check timeout limit | // retry limit exceeded; check timeout limit | ||
− | if ($this->objDataClients-> | + | if ($this->objDataClients->ThrottleTime < $this->Setting('throttle_timeout')) { |
EditPage::spamPage('Too many spam attempts from your IP address ('.$this->strIPAddr.'). Please come back later.'); | EditPage::spamPage('Too many spam attempts from your IP address ('.$this->strIPAddr.'). Please come back later.'); | ||
$this->RecordAttempt($strThrType); // record post attempt by throttled client | $this->RecordAttempt($strThrType); // record post attempt by throttled client | ||
Line 207: | Line 273: | ||
// AMPERSANDBOT DETECTED; refuse to save the edit | // AMPERSANDBOT DETECTED; refuse to save the edit | ||
$this->RecordAttempt('AMP'); // record spam attempt (AMP = ampersandbot) | $this->RecordAttempt('AMP'); // record spam attempt (AMP = ampersandbot) | ||
− | // The string | + | // The string returned will be shown after "The following text is what triggered our spam filter:" |
− | + | $retVal = 'The removal of everything after the first ampersand. You are an ampersandbot; please go away.'; | |
− | // LATER: it would be nice to | + | // LATER: it would be nice to display the error in a different format from the usual spam page |
− | |||
} | } | ||
} | } | ||
Line 221: | Line 286: | ||
if (!$retVal) { | if (!$retVal) { | ||
− | + | // set_error_handler ('ErrorHandler',E_WARNING); | |
− | $objTblPatterns = new | + | $objTblPatterns = new clsTable($this->dbSpam,'patterns','ID'); |
$objDataPatterns = $objTblPatterns->GetData('isActive'); | $objDataPatterns = $objTblPatterns->GetData('isActive'); | ||
− | + | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
$strTextCk = strtolower($text); | $strTextCk = strtolower($text); | ||
− | + | $isMatch = FALSE; | |
− | + | while($objDataPatterns->NextRow() && !$isMatch) { | |
− | while | + | $strPattern = $objDataPatterns->Pattern; |
− | $strPattern = $objDataPatterns-> | + | $isRegex = $objDataPatterns->isRegex; |
− | $isRegex = $objDataPatterns-> | + | $this->idPattern = $objDataPatterns->ID; |
− | $this->idPattern = $objDataPatterns-> | ||
− | |||
if ($isRegex) { | if ($isRegex) { | ||
− | |||
− | |||
$strPattCk = $strPattern; | $strPattCk = $strPattern; | ||
// Attempt at using Perl-compatible regex, but it doesn't seem to work (or maybe just too many patterns have issues): | // Attempt at using Perl-compatible regex, but it doesn't seem to work (or maybe just too many patterns have issues): | ||
Line 264: | Line 319: | ||
} | } | ||
// $debug .= 'ROW: '.DumpArray($objDataPatterns->Row); | // $debug .= 'ROW: '.DumpArray($objDataPatterns->Row); | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
} | } | ||
Line 277: | Line 326: | ||
// The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:" | // The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:" | ||
− | + | $retVal = '(pattern #'.$this->idPattern.') '.$strMatch; | |
// Log the spam attempt: | // Log the spam attempt: | ||
// $sql = 'SELECT * FROM clients WHERE Address="'.$this->strIPAddr.'"'; | // $sql = 'SELECT * FROM clients WHERE Address="'.$this->strIPAddr.'"'; | ||
Line 283: | Line 332: | ||
// update or create client record: | // update or create client record: | ||
$this->RecordAttempt('-'); | $this->RecordAttempt('-'); | ||
− | |||
} else { | } else { | ||
// no spam cues found; allow the edit to be saved, if nothing else has tripped the filter | // no spam cues found; allow the edit to be saved, if nothing else has tripped the filter | ||
− | if ($ | + | if ($this->Setting('log_ok_edits')) { |
$this->RecordOkEdit(); | $this->RecordOkEdit(); | ||
} | } | ||
Line 295: | Line 343: | ||
/**/ | /**/ | ||
} | } | ||
+ | $objDataPatterns->NextRow(); | ||
} | } | ||
Revision as of 00:28, 5 April 2009
{{#lst:SpamFerret|navbar}}: SpamFerret.php
Code
<php><?php /*
HISTORY: 2007-08-19 (Wzl) fixed line 155 call to clsDatabase::Query() (method deprecated and removed) 2007-09-30 (Wzl) fixing regex processing 2007-10-11 (Wzl) logging ampersandbot attempts; spam throttle (automatic temporary blacklist) 2007-10-13 (Wzl) fixed some issues which were preventing throttling from working - mainly changes to SQL 2007-10-15 (Wzl) "Code" wasn't being recorded. Decided that normal filtering should use code '-' so NULL means something is wrong. 2007-10-28 (Wzl) Events with THR and AMP codes *still* weren't being recorded because method call was improperly formatted. 2007-12-23 (Wzl) Emails wikimaster if eregi() returns an error (due to improperly formatted regex) 2007-12-26 (Wzl) Spam turd rejection / logging 2007-12-27 (Wzl) Fixed spam turd detection to work for new pages too (will probably need refinement) 2007-12-27 (Wzl) Ooops. Replaced missing if-block from regex results when inserted text is found. 2008-08-29 (Wzl) Added permanent IP blocking 2008-09-04 (Wzl) Added (optional) logging of successful edits 2008-09-19 (Wzl) Actually *set* the "didEdit" flag for successful edits <facepalms> 2008-10-21 (Wzl) Fixed minor syntax error in "defines" 2009-02-25 (Wzl) $objArticleCurr->loadLastEdit() now causes error in MW 1.14 (was it necessary before?) 2009-03-10 (Wzl) "require" -> "require_once" so other extensions can use data.php without conflict also optional $kfpWzlLibs so data.php can be somewhere not on the path 2009-03-18 (Wzl) Got rid of shared.php requirement; now using kfpLib to locate data.php 2009-03-26 (Wzl) Rewrote data library calls to use new classes (no longer using deprecated/removed classes) Also modified to use newer function hooks TO DO: * Log matching text for regex filters * Throttled save attempts should check for spam, just for data-gathering purposes. Possibly non-spam from a throttled IP should not update the "WhenLast" timestamp. Maybe this should be a LocalSettings option? * Figure out how to display a different error message than "the following text is what triggered our filter:" OPTIONAL SETTINGS: kfpLib - path to data.php folder (no final slash) kfsLib_Data - filespec of data.php
- /
- Loader for spam blacklist feature
- Include this from LocalSettings.php
if ( defined( 'MEDIAWIKI' ) ) {
$wgExtensionCredits['other'][] = array( 'name' => 'SpamFerret', 'author' => 'Woozle Staddon', 'url' => 'http://htyp.org/SpamFerret', 'version' => '2009-03-26', 'description' => 'database-driven wikispam blocker', );
if (!defined('kfsLib_Data')) {
if (defined('kfpLib')) {
define('kfsLib_Data', kfpLib.'/data.php');
} else {
define('kfsLib_Data','data.php'); // assume it's on the path
}
} require_once kfsLib_Data;
/* ==============
SET UP CALLBACKS
- /
global $wgFilterCallback, $wgPreSpamFilterCallback;
$wgPreSpamFilterCallback = false; if ( defined( 'MW_SUPPORTS_EDITFILTERMERGED' ) ) {
$wgHooks['EditFilterMerged'][] = 'wfSpamFerretMerged';
} else {
if ( $wgFilterCallback ) { $wgPreSpamFilterCallback = $wgFilterCallback; } $wgFilterCallback = 'wfSpamFerretFilter';
}
/* $wgHooks['EditFilter'][] = 'wfSpamFerretValidate'; $wgHooks['ArticleSaveComplete'][] = 'wfSpamFerretArticleSave'; $wgHooks['APIEditBeforeSave'][] = 'wfSpamFerretAPIEditBeforeSave';
- /
/* ================
SET GLOBAL OBJECTS
- /
function GetSpamFerret() {
static $objFerret;
if (!isset($objFerret)) {
$objFerret = new SpamFerret();
} return $objFerret;
} /* ================
CALLBACK FUNCTIONS
- /
/**
* Hook function for $wgFilterCallback */
function wfSpamFerretFilter( &$title, $text, $section, &$hookErr, $editSummary ) {
global $wgOut;
$spamObj = GetSpamFerret(); $wgOut->addWikiText( "Intercepted by SpamFerretFilter" ); return $spamObj->filter( $title, $text, $section );
}
/**
* Hook function for EditFilterMerged, replaces wfSpamBlacklistFilter */
function wfSpamFerretMerged( &$editPage, $text, &$hookErr, $editSummary ) {
global $wgTitle,$wgOut;
if( is_null( $wgTitle ) ) { # API mode # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist return true; }
$spamObj = GetSpamFerret(); $title = $editPage->mArticle->getTitle(); $ret = $spamObj->filter( $title, $text, , $editSummary, $editPage ); if ( $ret !== false ) $editPage->spamPage( $ret );
// additional text can be added here: // $wgOut->addWikiText( "Intercepted by SpamFerretMerged" );
// Return convention for hooks is the inverse of $wgFilterCallback return ( $ret === false );
} /**
* Hook function for APIEditBeforeSave */
function wfSpamFerretAPIEditBeforeSave( &$editPage, $text, &$resultArr ) { }
class SpamFerret { var $previousFilter = false; // internal data var $dbSpam; var $objDataClients; var $strIPAddr; var $idPattern; var $idClient; var $doClearThrottle;
// function SpamFerret() { // }
function Setting($iName) { global $wgSpamFerretSettings;
return $wgSpamFerretSettings[$iName]; }
function filter( &$title, $text, $section, $editSummary, $editPage ) { global $wgArticle, $wgDBname, $wgMemc, $messageMemc, $wgVersion, $wgOut; global $wgTitle, $wgServer; global $wgEmergencyContact; global $debug; global $errNum, $errStr; // debugging: global $sql;
$fname = 'wfSpamFerretFilter'; wfProfileIn( $fname );
# Call the rest of the hook chain first if ( $this->previousFilter ) { $f = $this->previousFilter; if ( $f( $title, $text, $section ) ) { wfProfileOut( $fname ); return true; } } // initialize variables $retVal = FALSE; // default = assume edit is ok $msgEmail = FALSE;
// get the IP address of the http client making the edit attempt: $this->strIPAddr = wfGetIP(); // Open the database $this->dbSpam = new clsDatabase($this->Setting('dbspec')); $this->dbSpam->Open(); // open clients table (extended Throttle version) for reference: //return TRUE; $objTblClients = new clsTable($this->dbSpam,'ClientThrottle','ID'); // Look up to see if this IP is known; it may already be throttled: $this->objDataClients = $objTblClients->GetData('Address="'.$this->strIPAddr.'"'); if (is_object($this->objDataClients)) { if ($this->objDataClients->hasRows()) { $isClientKnown = true; } } if ($isClientKnown) { $this->idClient = $this->objDataClients->ID; $doBlock = $this->objDataClients->doBlock; if ($doBlock) { $strThrType = 'BLK'; } else { $intRetries = $this->objDataClients->Retries; $intThrottle = $this->Setting('throttle_retries'); $doBlock = $intRetries > $intThrottle; $strThrType = 'THR-'.$intRetries; } if ($doBlock) { // retry limit exceeded; check timeout limit if ($this->objDataClients->ThrottleTime < $this->Setting('throttle_timeout')) { EditPage::spamPage('Too many spam attempts from your IP address ('.$this->strIPAddr.'). Please come back later.'); $this->RecordAttempt($strThrType); // record post attempt by throttled client $retVal = true; // client has exceeded spam limit; impose throttle } else { $this->doClearThrottle = true; } } }
if (!$retVal) { /*
At this point, there's apparently no reason to block the client just for being who they are,
so now check for common non-listable offenses. These involve comparing the new contents with the original, so first we get the original (current) article contents plus some information about what has changed:
* $strIns = whatever has been inserted at the start of the article (or contents of new article)
- /
$objArticleCurr = new Article($title); //$objArticleCurr->loadLastEdit(); // function is now protected if ($objArticleCurr->exists()) { $txtCurr = $objArticleCurr->getContent(); $lenIns = strpos($text,$txtCurr); if ($lenIns !== false) { $strIns = substr($text,0,$lenIns); } } else { $lenIns = strlen($text); $strIns = $text; } // ** OFFENSE: Spam turds (short bits of nonsense inserted at the beginning of an article): if ($strIns != ) { // new page is old page with something inserted at the beginning // get the inserted text $isMatch = preg_match('/^[a-z0-9]+ ?$/',$strIns); // another way to do it, incomplete: // $lenMatch = strspn($strIns, '1234567890abcdefghijklmnopqrstuvwxyz'); // if (strlen($strIns) - $lenMatch < 2) { // } if ($isMatch) { $this->RecordAttempt('TRD',$strIns); // record spam attempt (AMP = ampersandbot) // The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:" EditPage::spamPage( '"'.$strIns.'" (spam turd).' ); $retVal = true; } } // ** OFFENSE: Ampersandbot: $lenNew = strlen($text); $posMatch = strpos($txtCurr, $text); //$debug = 'OLD=['.substr($txtCurr,0,5).'] NEW=['.substr($text,0,5).'] STRPOS='.$posMatch.' ===0?:'.($posMatch===0); //$debug .= '...GOT TO HERE ... '; if ($posMatch===0) { //$debug .= '1'; // new string starts the same as old string; is it a truncated subset? if ($lenNew < strlen($txtCurr)) { //$debug .= '2'; // new string is a truncation of old string // ideally, we would just check to see if the missing character is an ampersand - // ...but unfortunately, something is quasi-randomly mutating the strings in a way which // leaves the exact position of the "missing character" in some doubt. So what we do is this: // 1. Find the position of the first ampersand in OLD TEXT: $posAmp = strpos($txtCurr,'&'); // 2. Compare this position with the length of NEW TEXT: $posDiff = abs(strlen($text)-$posAmp); // 3. If the difference is less than some limit, then presume Ampersandbot activity: if ($posDiff < 3) { // TO DO: log $posDiff for later analysis // AMPERSANDBOT DETECTED; refuse to save the edit $this->RecordAttempt('AMP'); // record spam attempt (AMP = ampersandbot) // The string returned will be shown after "The following text is what triggered our spam filter:" $retVal = 'The removal of everything after the first ampersand. You are an ampersandbot; please go away.'; // LATER: it would be nice to display the error in a different format from the usual spam page } } } } if ($debug) { EditPage::spamPage('DEBUG: '.$debug); $retVal = true; }
if (!$retVal) { // set_error_handler ('ErrorHandler',E_WARNING); $objTblPatterns = new clsTable($this->dbSpam,'patterns','ID'); $objDataPatterns = $objTblPatterns->GetData('isActive');
$strTextCk = strtolower($text);
$isMatch = FALSE;
while($objDataPatterns->NextRow() && !$isMatch) {
$strPattern = $objDataPatterns->Pattern;
$isRegex = $objDataPatterns->isRegex;
$this->idPattern = $objDataPatterns->ID;
if ($isRegex) {
$strPattCk = $strPattern;
// Attempt at using Perl-compatible regex, but it doesn't seem to work (or maybe just too many patterns have issues):
// $strPattCk_pcre = str_replace('/','\/',$strPattCk);
// $isMatch = preg_match('/'.$strPattCk_pcre.'/i',$strTextCk,$matches);
$isMatch = eregi($strPattCk, $strTextCk, $matches);
if ($errNum) {
$msgEmail .= 'Filter #'.$this->idPattern.' generated error #'.$errNum.': '.$errStr."\n";
// echo '
ERROR #'.$errNum.': '.$errStr;
$errNum = 0;
}
if ($isMatch) { $strMatch = $matches[0]; //echo ' match: '.$strMatch; } } else { //echo ' non-regex'; $strMatch = stristr ($strTextCk,$strPattern); $isMatch = ($strMatch != ); //echo ' match: '.$strMatch; } // $debug .= 'ROW: '.DumpArray($objDataPatterns->Row); }
if ( $strMatch != ) { // spam cue found; display the matching text and don't allow the edit to be saved: wfDebug( "Match!\n" );
// The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:" $retVal = '(pattern #'.$this->idPattern.') '.$strMatch; // Log the spam attempt: // $sql = 'SELECT * FROM clients WHERE Address="'.$this->strIPAddr.'"'; // $this->objDataClients = $this->dbSpam->Query($sql); // update or create client record: $this->RecordAttempt('-'); } else { // no spam cues found; allow the edit to be saved, if nothing else has tripped the filter if ($this->Setting('log_ok_edits')) { $this->RecordOkEdit(); } /* EditPage::spamPage( 'DEBUGGING: '.$debug ); $retVal = true; /*/ /**/ } $objDataPatterns->NextRow(); }
wfProfileOut( $fname ); if ($msgEmail) { mail ($wgEmergencyContact,'spamferret filter error',$msgEmail); // die(); } //$wgOut->addHTML($out); return $retVal; /**/ } public function RecordAttempt($iCode,$iMatch=NULL) { global $wgTitle, $wgServer;
if ($this->idClient != 0) { if ($this->doClearThrottle) { $strRetries = '0'; } else { $strRetries = 'Retries+1'; } $sql = 'UPDATE clients SET WhenLast=NOW(),Count=Count+1, Retries='.$strRetries.' WHERE Address="'.$this->strIPAddr.'"'; $this->dbSpam->Exec($sql); } else { $sql = 'INSERT INTO clients (Address,WhenFirst,Count,Retries) VALUES("'.$this->strIPAddr.'",NOW(),1,0)'; $this->dbSpam->Exec($sql); $this->idClient = $this->dbSpam->NewID(); }
$sqlURL = '"'.$this->dbSpam->SafeParam($wgTitle->getFullURL()).'"'; $sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"'; $sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"'; $sqlPattern = $this->idPattern; if (is_null($sqlPattern)) { $sqlPattern = 'NULL'; } else { $isPattern = true; } $sqlMatch = is_null($iMatch)?'NULL':'"'.$iMatch.'"'; $sql = 'INSERT INTO attempts (`When`,ID_Pattern,ID_Client,IDS_Session,PageServer,PageName,Code,MatchText) VALUES (NOW(),'.$sqlPattern.','.$this->idClient.','.SQL_Value(session_id()).','.$sqlSrvr.','.$sqlPage.',"'.$iCode.'",'.$sqlMatch.')'; $this->dbSpam->Exec($sql); if ($isPattern) { $sql = 'UPDATE patterns SET WhenTried=NOW(), Count=Count+1 WHERE ID='.$this->idPattern; $this->dbSpam->Exec($sql); } } public function RecordOkEdit() { global $wgTitle, $wgServer;
$sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"'; $sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"'; $sql = 'INSERT INTO attempts (`When`,ID_Pattern,ID_Client,IDS_Session,PageServer,PageName,Code,MatchText,didAllow) VALUES (NOW(),NULL,'.$this->idClient.','.SQL_Value(session_id()).','.$sqlSrvr.','.$sqlPage.',NULL,NULL,TRUE)'; $this->dbSpam->Exec($sql); } }
function ErrorHandler ($errno ,$errstr) { global $errNum, $errStr;
$errNum = $errno; $errStr = $errstr; }
function SQL_Value($iVar) { if (is_null($iVar)) { return 'NULL'; } else { if (is_numeric($iVar)) { return $iVar; } else { return '"'.$iVar.'"'; } } }
} // end of 'MEDIAWIKI' check</php>