Difference between revisions of "SpamFerret.php"

from HTYP, the free directory anyone can edit if they can prove to me that they're not a spambot
Jump to navigation Jump to search
(→‎Code: 10/12 version supports throttling by IP)
(hand-merge of /dev and previous wiki version, with minor fixes)
 
(18 intermediate revisions by 2 users not shown)
Line 1: Line 1:
 
==Navigation==
 
==Navigation==
[[computing]]: [[software]]: [[MediaWiki]]: [[fighting spam posts in MediaWiki|fighting spam]]: [[SpamFerret]]: [[SpamFerret.php]]
+
{{#lst:SpamFerret|navbar}}: [[SpamFerret.php]]
 +
 
 
==Code==
 
==Code==
 
<php><?php
 
<php><?php
Line 8: Line 9:
 
   2007-09-30 (Wzl) fixing regex processing
 
   2007-09-30 (Wzl) fixing regex processing
 
   2007-10-11 (Wzl) logging ampersandbot attempts; spam throttle (automatic temporary blacklist)
 
   2007-10-11 (Wzl) logging ampersandbot attempts; spam throttle (automatic temporary blacklist)
 +
  2007-10-13 (Wzl) fixed some issues which were preventing throttling from working - mainly changes to SQL
 +
  2007-10-15 (Wzl) "Code" wasn't being recorded. Decided that normal filtering should use code '-' so NULL means something is wrong.
 +
  2007-10-28 (Wzl) Events with THR and AMP codes *still* weren't being recorded because method call was improperly formatted.
 +
  2007-12-23 (Wzl) Emails wikimaster if eregi() returns an error (due to improperly formatted regex)
 +
  2007-12-26 (Wzl) Spam turd rejection / logging
 +
  2007-12-27 (Wzl) Fixed spam turd detection to work for new pages too (will probably need refinement)
 +
  2007-12-27 (Wzl) Ooops. Replaced missing if-block from regex results when inserted text is found.
 +
  2008-08-29 (Wzl) Added permanent IP blocking
 +
  2008-09-04 (Wzl) Added (optional) logging of successful edits
 +
  2008-09-19 (Wzl) Actually *set* the "didEdit" flag for successful edits <facepalms>
 +
  2008-10-21 (Wzl) Fixed minor syntax error in "defines"
 +
  2009-02-25 (Wzl) $objArticleCurr->loadLastEdit() now causes error in MW 1.14 (was it necessary before?)
 +
  2009-03-10 (Wzl) "require" -> "require_once" so other extensions can use data.php without conflict
 +
    also optional $kfpWzlLibs so data.php can be somewhere not on the path
 +
  2009-03-18 (Wzl) Got rid of shared.php requirement; now using kfpLib to locate data.php
 +
  2009-03-26 (Wzl) Rewrote data library calls to use new classes (no longer using deprecated/removed classes)
 +
    Also modified to use newer function hooks
 +
  2009-04-24 (Wzl) fixed "strict" bug referencing unset $txtCurr when creating new page
 +
  2009-07-05 (Wzl) Using LibMgr
 +
  2009-07-14 (Wzl) Added attempts.Diff field, patterns.isDIff
 +
  2009-07-15 (Wzl)
 +
    On advice from FreeNode##php, changed from eregi() to preg_match()
 +
    Added option to match diff results instead of submitted edit only
 +
    Saves diff of each change, approved or not
 +
    BUG: approved edits are not being logged properly; using "OK" code and logging as failed
 +
  2009-07-26 (Wzl) fixed minor warning error on line 252
 +
  2009-08-07 (Wzl) email notification working; removed TRD and AMP hard-coded offenses, to be redone as isDiff filters if needed
 +
  2009-08-08 (Wzl) create client record immediately if client is not recognized; don't depend on spam filter being triggered
 +
  2009-08-09 (Wzl) restructured "client" and "attempt" tables (was "clients", "attempts"); not backwards-compatible
 +
  2010-02-24 (Wzl) some code-tidying; trying to restrict passing of data between methods to single array var in args/return
 +
  2010-08-17 (Wzl) added some debug code to CheckRegex(); fixed problem with escaped chars in filter
 +
  2011-04-26 (Wzl) minor bug fixes; hand-merge with version on htyp.org
 +
TO DO:
 +
  * Throttled save attempts should check for spam, just for data-gathering purposes.
 +
    Possibly non-spam from a throttled IP should not update the "WhenLast" timestamp. Maybe this should be a LocalSettings option?
 +
  * Figure out how to display a different error message than "the following text is what triggered our filter:"
 +
OPTIONAL SETTINGS:
 +
  kfpLib - path to data.php folder (no final slash)
 +
  kfsLib_Data - filespec of data.php
 
*/
 
*/
 
// debugging activation
 
define(KDO_DEBUG,0);
 
define(KDO_DEBUG_STACK,0);
 
// debugging options
 
define(KDO_DEBUG_HTML,1);
 
define(KDO_DEBUG_IMMED,1);
 
define(KDO_DEBUG_DARK,0);
 
  
 
# Loader for spam blacklist feature
 
# Loader for spam blacklist feature
Line 22: Line 54:
  
 
if ( defined( 'MEDIAWIKI' ) ) {
 
if ( defined( 'MEDIAWIKI' ) ) {
require('shared.php');
 
  
 +
$wgExtensionCredits['other'][] = array(
 +
'name' => 'SpamFerret',
 +
'author' => 'Woozle Staddon',
 +
'url' => 'http://htyp.org/SpamFerret',
 +
'version' => '2011-04-26',
 +
'description' => 'database-driven wikispam blocker',
 +
);
 +
if (!defined('kfsLib_Data')) {
 +
    if (defined('kfpLib')) {
 +
define('kfsLib_Data', kfpLib.'/data.php');
 +
    } else {
 +
define('kfsLib_Data','data.php'); // assume it's on the path
 +
    }
 +
}
 +
if (!defined('LIBMGR')) {
 +
    require('libmgr.php');
 +
}
 +
clsLibMgr::Add('data', kfsLib_Data,__FILE__,__LINE__);
 +
clsLibMgr::Load('data',__FILE__,__LINE__);
 +
 +
/* ==============
 +
SET UP CALLBACKS
 +
*/
 
global $wgFilterCallback, $wgPreSpamFilterCallback;
 
global $wgFilterCallback, $wgPreSpamFilterCallback;
  
if ( $wgFilterCallback ) {
+
$wgPreSpamFilterCallback = false;
$wgPreSpamFilterCallback = $wgFilterCallback;
+
if ( defined( 'MW_SUPPORTS_EDITFILTERMERGED' ) ) {
 +
    $wgHooks['EditFilterMerged'][] = 'wfSpamFerretMerged';
 
} else {
 
} else {
$wgPreSpamFilterCallback = false;
+
    if ( $wgFilterCallback ) {
 +
        $wgPreSpamFilterCallback = $wgFilterCallback;
 +
    }
 +
    $wgFilterCallback = 'wfSpamFerretFilter';
 +
}
 +
 
 +
/*
 +
$wgHooks['EditFilter'][] = 'wfSpamFerretValidate';
 +
$wgHooks['ArticleSaveComplete'][] = 'wfSpamFerretArticleSave';
 +
$wgHooks['APIEditBeforeSave'][] = 'wfSpamFerretAPIEditBeforeSave';
 +
*/
 +
 
 +
/* ================
 +
SET GLOBAL OBJECTS
 +
*/
 +
function GetSpamFerret() {
 +
    static $objFerret;
 +
 
 +
    if (!isset($objFerret)) {
 +
$objFerret = new SpamFerret();
 +
    }
 +
    return $objFerret;
 +
}
 +
/* ================
 +
CALLBACK FUNCTIONS
 +
*/
 +
/**
 +
* Hook function for $wgFilterCallback
 +
*/
 +
//function wfSpamFerretFilter( &$title, $text, $section, &$hookErr, $editSummary ) {
 +
function wfSpamFerretFilter( &$title, $text, $section ) {
 +
    global $wgOut;
 +
 
 +
    $spamObj = GetSpamFerret();
 +
    $wgOut->addWikiText( "Intercepted by SpamFerretFilter" );
 +
//    $ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage );
 +
    return $spamObj->filter( $title, $text, $section );
 
}
 
}
  
$wgFilterCallback = 'wfSpamFerretLoader';
+
/**
$wgExtensionCredits['other'][] = array(
+
* Hook function for EditFilterMerged, replaces wfSpamBlacklistFilter
'name' => 'SpamFerret',
+
*/
         'author' => 'Woozle Staddon',
+
function wfSpamFerretMerged( $editPage, $text, &$hookErr, $editSummary ) {
         'url' => 'http://htyp.org/SpamFerret',
+
    global $wgTitle,$wgOut;
'version' => '2007-10-11',
+
 
         'description' => 'database-driven wikispam content blocker',
+
    if( is_null( $wgTitle ) ) {
);
+
         # API mode
 +
         # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist
 +
         return true;
 +
    }
  
function wfSpamFerretLoader( &$title, $text, $section ) {
+
    $spamObj = GetSpamFerret();
static $spamObj = false;
+
    $title = $editPage->mArticle->getTitle();
global $wgSpamFerretSettings, $wgPreSpamFilterCallback;
+
//    $ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage );
 +
    $ret = $spamObj->filter( $title, $text, '', $editPage );
 +
    if ( $ret !== false ) $editPage->spamPage( $ret );
  
if ( $spamObj === false ) {
+
// additional text can be added here:
$spamObj = new SpamFerret( $wgSpamFerretSettings );
+
//    $wgOut->addWikiText( "Intercepted by SpamFerretMerged" );
}
 
  
return $spamObj->filter( $title, $text, $section );
+
    // Return convention for hooks is the inverse of $wgFilterCallback
 +
    return ( $ret === false );
 +
}
 +
/**
 +
* Hook function for APIEditBeforeSave
 +
*/
 +
function wfSpamFerretAPIEditBeforeSave( &$editPage, $text, &$resultArr ) {
 
}
 
}
 +
 
class SpamFerret {
 
class SpamFerret {
var $dbspec;
+
  var $previousFilter = false;
var $throttle_retries;
 
var $throttle_timeout;
 
var $previousFilter = false;
 
 
// internal data
 
// internal data
var $dbSpam;
+
  var $dbSpam;
var $objDataClients;
+
  var $objDataClients;
var $strIPAddr;
+
  var $strIPAddr;
var $idPattern;
+
  var $idPattern;
var $idClient;
+
  var $doClearThrottle;
var $isClientKnown;
+
 
var $doClearThrottle;
+
  function Setting($iName) {
+
    global $wgSpamFerretSettings;
function SpamFerret( $settings = array() ) {
+
 
global $IP;
+
    return $wgSpamFerretSettings[$iName];
 +
  }
  
foreach ( $settings as $name => $value ) {
+
  /*-----
$this->$name = $value;
+
    PROPERTIES USED: $this->idPattern (out)
}
+
  */
}
+
  function filter( &$title, $text, $section, $editPage = FALSE ) {
 +
    global $wgArticle, $wgDBname, $wgMemc, $messageMemc, $wgVersion, $wgOut;
 +
    global $wgTitle, $wgServer;
 +
    global $debug;
 +
    global $errNum, $errStr;
 +
    global $gErrorText;
 +
    // debugging:
 +
    global $sql;
  
function filter( &$title, $text, $section ) {
+
    $fname = 'wfSpamFerretFilter';
global $wgSpamFerretSettings;
+
    wfProfileIn( $fname );
global $wgArticle, $wgDBname, $wgMemc, $messageMemc, $wgVersion, $wgOut;
 
global $wgTitle, $wgServer;
 
global $debug;
 
  
$fname = 'wfSpamFerretFilter';
+
    ini_set('track_errors', 1);
wfProfileIn( $fname );
 
  
# Call the rest of the hook chain first
+
    # Call the rest of the hook chain first
if ( $this->previousFilter ) {
+
    if ( $this->previousFilter ) {
$f = $this->previousFilter;
+
    $f = $this->previousFilter;
if ( $f( $title, $text, $section ) ) {
+
    if ( $f( $title, $text, $section ) ) {
wfProfileOut( $fname );
+
    wfProfileOut( $fname );
return true;
+
    return true;
}
+
    }
}
+
    }
 +
// initialize variables
 +
    $retVal = FALSE; // default = assume edit is ok
 +
    $gErrorText = FALSE;
 +
    $isClientKnown = FALSE;
  
$retVal = false; // default = assume edit is ok
+
    $this->txtEditRaw = $text; // DEPRECATED
 +
    $arArgs['edit-raw'] = $text;
  
 
// get the IP address of the http client making the edit attempt:
 
// get the IP address of the http client making the edit attempt:
$this->strIPAddr = wfGetIP();
+
    $this->strIPAddr = wfGetIP();
 
// Open the database
 
// Open the database
$this->dbSpam = new clsDatabase($this->dbspec);
+
    $this->OpenDatabase();
 
// open clients table (extended Throttle version) for reference:
 
// open clients table (extended Throttle version) for reference:
$objTblClients = new clsDataTable($this->dbSpam,'ClientThrottle');
+
//return TRUE;
 +
    $objTblClients = new clsTable($this->dbSpam);
 +
      $objTblClients->Name('ClientThrottle2');
 +
      $objTblClients->KeyName('Address');
 
// Look up to see if this IP is known; it may already be throttled:
 
// Look up to see if this IP is known; it may already be throttled:
$this->objDataClients = $objTblClients->GetData('Address="'.$this->strIPAddr.'"');
+
      $this->objDataClients = $objTblClients->GetData('Address="'.$this->strIPAddr.'"');
if (is_object($this->objDataClients)) {
+
      if (is_object($this->objDataClients)) {
if ($this->objDataClients->RowCount() > 0) {
+
if ($this->objDataClients->hasRows()) {
$this->isClientKnown = true;
+
    $isClientKnown = TRUE;
}
+
}
}
+
      }
if ($this->isClientKnown) {
+
      if ($isClientKnown) {
$this->idClient = $this->objDataClients->GetValue('ID');
+
$this->objDataClients->FirstRow();
if ($this->objDataClients->GetValue('Retries') > $this->throttle_retries) {
+
$doBlock = $this->objDataClients->doBlock;
 +
if ($doBlock) {
 +
$strThrType = 'BLK';
 +
} else {
 +
$intRetries = $this->objDataClients->Retries;
 +
$intThrottle = $this->Setting('throttle_retries');
 +
$doBlock = $intRetries > $intThrottle;
 +
$strThrType = 'THR-'.$intRetries;
 +
}
 +
if ($doBlock) {
 +
  $arArgs['diff'] = NULL; // not applicable
 
// retry limit exceeded; check timeout limit
 
// retry limit exceeded; check timeout limit
if ($this->objDataClients->GetValue('ThrottleTime') < $this->throttle_timeout) {
+
  if ($this->objDataClients->ThrottleTime < $this->Setting('throttle_timeout')) {
$retval = true; // client has exceeded spam limit; impose throttle
+
    $txtMsg = 'Too many spam attempts from your IP address ('.$this->strIPAddr.'). Please come back later.';
RecordAttempt('THR'); // record post attempt by throttled client
+
    if (is_object($editPage)) {
EditPage::spamPage('Too many spam attempts from this IP address. Please come back later.');
+
$editPage->spamPage($txtMsg);
} else {
+
    } else {
$this->doClearThrottle = true;
+
EditPage::spamPage($txtMsg); // older MW code doesn't supply $editPage
}
+
    }
}
+
    $arArgs['code'] = $strThrType;
}
+
    $this->RecordAttempt($arArgs); // record post attempt by throttled client
 
+
    $retVal = true; // client has exceeded spam limit; impose throttle
// Check for ampersandbot
+
  } else {
$objArticleCurr = new Article($title);
+
    $this->doClearThrottle = true;
$objArticleCurr->loadLastEdit();
+
  }
$txtCurr = $objArticleCurr->getContent();
+
}
$lenNew = strlen($text);
+
      } else {
$posMatch = strpos($txtCurr, $text);
+
$this->CreateClient();
//$debug = 'OLD=['.substr($txtCurr,0,5).'] NEW=['.substr($text,0,5).'] STRPOS='.$posMatch.' ===0?:'.($posMatch===0);
+
      }
//$debug .= '...GOT TO HERE ... ';
+
//$this->txtDiff = 'N/A';
if ($posMatch===0) {
+
$arArgs['diff'] = 'N/A';
//$debug .= '1';
 
// new string starts the same as old string; is it a truncated subset?
 
if ($lenNew < strlen($txtCurr)) {
 
//$debug .= '2';
 
// new string is a truncation of old string
 
// ideally, we would just check to see if the missing character is an ampersand -
 
// ...but unfortunately, something is quasi-randomly mutating the strings in a way which
 
// leaves the exact position of the "missing character" in some doubt. So what we do is this:
 
// 1. Find the position of the first ampersand in OLD TEXT:
 
$posAmp = strpos($txtCurr,'&');
 
// 2. Compare this position with the length of NEW TEXT:
 
$posDiff = abs(strlen($text)-$posAmp);
 
// 3. If the difference is less than some limit, then presume Ampersandbot activity:
 
if ($posDiff < 3) {
 
// TO DO: log $posDiff for later analysis
 
// AMPERSANDBOT DETECTED; refuse to save the edit
 
RecordAttempt('AMP'); // record spam attempt (AMP = ampersandbot)
 
// The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:"
 
EditPage::spamPage( 'The removal of everything after the first ampersand. You are an ampersandbot; please go away.' );
 
// LATER: it would be nice to have a special MediaWiki:Ampersandbot page to display when this happens
 
$retVal = true;
 
}
 
}
 
}
 
if ($debug) {
 
EditPage::spamPage('DEBUG: '.$debug);
 
$retVal = true;
 
}
 
  
if (!$retval) {
+
if (!$retVal) {
$objTblPatterns = new clsDataTable($this->dbSpam,'patterns');
+
  $arRtn = $this->GetDiff($title); // get the diff between edit and current contents
$objDataPatterns = $objTblPatterns->GetData('isActive');
+
    $arArgs['diff'] = $arRtn['diff'];
/*
+
    $arArgs['doAll'] = FALSE;
$debug .= ' objDataPatterns is object:'.is_object($objDataPatterns);
+
    $arRtn = $this->CheckFilters($arArgs);
$debug .= ' objDataPatterns.Res is object:'.is_object($objDataPatterns->Res);
+
    $arArgs['edit-to-check'] = $arRtn['edit-to-check'];
$debug .= ' objDataPatterns.Row is array:'.is_array($objDataPatterns->Row);
+
  if ( $this->isMatch ) {
$debug .= ' objDataPatterns.Res is class '.get_class($objDataPatterns->Res);
 
$debug .= ' objDataPatterns.Res has '.$objDataPatterns->Res->num_rows.' row(s)';
 
*/
 
$strTextCk = strtolower($text);
 
//$cr = "\n";
 
//echo 'DEBUGGING spam filter - please excuse the mess!'.$cr;
 
while(is_array($objDataPatterns->Row)) {
 
$strPattern = $objDataPatterns->GetValue('Pattern');
 
$isRegex = $objDataPatterns->GetValue('isRegex');
 
$this->idPattern = $objDataPatterns->GetValue('ID');
 
if ($isRegex) {
 
//echo 'PATTERN: '.$strPattern.$cr;
 
$strPattCk = $strPattern;
 
// $isMatch = preg_match('/'.$strPattCk.'/',$strTextCk,$matches);
 
$isMatch = eregi($strPattCk, $strTextCk, $matches);
 
if ($isMatch) {
 
$strMatch = $matches[0];
 
}
 
} else {
 
$strMatch = stristr ($strTextCk,$strPattern);
 
$isMatch = ($strMatch != '');
 
}
 
// $debug .= 'ROW: '.DumpArray($objDataPatterns->Row);
 
 
if ($isMatch) {
 
$objDataPatterns->Row = NULL; // stop the search
 
} else {
 
$objDataPatterns->NextRow(); // keep looking
 
}
 
}
 
 
if ( $strMatch != '' ) {
 
 
// spam cue found; display the matching text and don't allow the edit to be saved:
 
// spam cue found; display the matching text and don't allow the edit to be saved:
wfDebug( "Match!\n" );
+
    wfDebug( "Match!\n" );
 
 
 
// The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:"
 
// The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:"
EditPage::spamPage( $strMatch );
+
    $retVal = '(pattern #'.$this->idPattern.') ['.htmlspecialchars($this->strMatch).']';
 
// Log the spam attempt:
 
// Log the spam attempt:
// $sql = 'SELECT * FROM clients WHERE Address="'.$this->strIPAddr.'"';
+
    $arArgs['code'] = '-';
// $this->objDataClients = $this->dbSpam->Query($sql);
+
    $this->RecordAttempt($arArgs);
// update or create client record:
+
  } else {
$this->RecordAttempt(NULL);
 
$retVal = true;
 
} else {
 
 
// no spam cues found; allow the edit to be saved, if nothing else has tripped the filter
 
// no spam cues found; allow the edit to be saved, if nothing else has tripped the filter
/*
+
    //if ($this->Setting('log_ok_edits')) {
EditPage::spamPage( 'DEBUGGING: '.$debug );
+
      $this->idPattern = NULL;
$retVal = true;
+
      $this->RecordOkEdit($arArgs);
/*/
+
    //$this->RecordAttempt('OK');
/**/
+
    //}
}
+
  }
}
+
}
  
wfProfileOut( $fname );
+
wfProfileOut( $fname );
return $retVal;
+
$this->ReportErrors();
 +
//$wgOut->addHTML($out);
 +
return $retVal;
 
/**/
 
/**/
 +
  }
 +
  public function OpenDatabase() {
 +
    $this->dbSpam = new clsDatabase($this->Setting('dbspec'));
 +
    $this->dbSpam->Open();
 +
  }
 +
  public function FiltTbl() {
 +
    $doLoad = TRUE;
 +
    $doLoad = empty($this->objFilts);
 +
    if ($doLoad) {
 +
      $objTbl = new clsTable($this->dbSpam);
 +
$objTbl->Name('patterns');
 +
$objTbl->KeyName('ID');
 +
      $this->objFilts = $objTbl;
 +
    }
 +
    return $this->objFilts;
 +
  }
 +
  /*-----
 +
    INPUT:
 +
      $iTitle - page for comparing proposed edit
 +
      $this->txtEditRaw (DEPRECATED; use $iarArgs['text-to-check']
 +
    OUTPUT:
 +
      return ['diff']
 +
  */
 +
  public function GetDiff($iTitle) {
 +
    $objArticleCurr = new Article($iTitle);
 +
    if ($objArticleCurr->exists()) {
 +
      $txtCurr = $objArticleCurr->getContent();
 +
      $txtDiff = FigureDiff($txtCurr,$this->txtEditRaw);
 +
    } else {
 +
      $txtDiff = '!!NEW: '.$this->txtEditRaw;
 +
    }
 +
    //$this->txtDiff = $txtDiff;
 +
    $arOut['diff'] = $txtDiff;
 +
    return $arOut;
 +
}
 +
    /*-----
 +
      INPUT:
 +
$iarArgs['doAll']
 +
$iarArgs['diff']
 +
$this->txtEditRaw - DEPRECATED; use $iarArgs
 +
      OUTPUT:
 +
$this->idPattern
 +
    */
 +
    public function CheckFilters(array $iarArgs) {
 +
    global $gRegexMatches,$gFilterMatches,$gFilterRows,$gFilterCount;
 +
    global $debug;
 +
 +
    assert('is_object($this->dbSpam)');
 +
 +
    $doCheckAll = $iarArgs['doAll'];
 +
    $strChkDiff = strtolower(nz($iarArgs['diff']));
 +
    $objFiltTbl = $this->FiltTbl();
 +
 +
    if ($doCheckAll) {
 +
$sqlFilt = NULL;
 +
    } else {
 +
$sqlFilt = 'isActive';
 +
    }
 +
 +
    $objFiltRows = $objFiltTbl->GetData($sqlFilt);
 +
    $objRow = $objFiltRows; // for shorthand
 +
 +
    $strTextEdit = strtolower($this->txtEditRaw);
 +
    //$this->txtEditChk = $strTextEdit; // text after being massaged for checking
 +
    $arOut['edit-to-check'] = $strTextEdit; // text after being massaged for checking
 +
    $this->isMatch = FALSE;
 +
    $gFilterCount = 0;
 +
    $gFilterRows = $objRow->RowCount();
 +
 +
    while($objRow->NextRow() && (!$this->isMatch || $doCheckAll)) {
 +
      $isMatch = FALSE;
 +
 +
      if ($objRow->isDiff) {
 +
$strTextCk = $strChkDiff;
 +
      } else {
 +
$strTextCk = $strTextEdit;
 +
      }
 +
 +
      if (!is_null($strTextCk)) {
 +
$gFilterCount++;
 +
$strPattern = strtolower($objRow->Pattern);
 +
$isRegex = $objRow->isRegex;
 +
$this->idPattern = $objRow->ID;
 +
if ($isRegex) {
 +
  $isMatch = $this->CheckRegex($strPattern,$strTextCk);
 +
 +
  if (isset($php_errormsg)) {
 +
  $this->AddErrorLine('Filter #'.$this->idPattern.' generated error "'.$php_errormsg);
 +
  }
 +
 +
  if ($isMatch) {
 +
  $this->strMatch = $gRegexMatches[0];
 +
  }
 +
} else {
 +
  if (empty($strPattern)) {
 +
      $isMatch = FALSE;
 +
  } else {
 +
    $this->strMatch = stristr($strTextCk,$strPattern);
 +
    $isMatch = ($this->strMatch != '');
 +
  }
 +
}
 +
if ($isMatch) {
 +
    $this->isMatch = TRUE;
 +
    if ($doCheckAll) {
 +
$gFilterMatches[$this->idPattern] = $this->strMatch;
 +
    }
 
}
 
}
public function RecordAttempt($iCode) {
+
      }
global $wgTitle, $wgServer;
+
    }
 +
  }
 +
  /*----
 +
    TO DO:
 +
      replace $gRegexMatches with return array
 +
      make this function static
 +
  */
 +
  public function CheckRegex($iPattern,$iText) {
 +
    global $gRegexMatches,$strDbg;
 +
 
 +
    $chDelim = '/';
 +
    $strPattCk = $iPattern;
 +
    // (2010-08-17) this next line causes incorrect handling of escaped characters in the filter
 +
    //$strPattCk = str_replace('\\','\\\\',$strPattCk); // make sure filter backslashes are prefixed to be literal
 +
    $strPattCk = str_replace($chDelim,'\\'.$chDelim,$strPattCk);
 +
    unset($php_errormsg); // TO DO: explain this
 +
    $strFinal = $chDelim.$strPattCk.$chDelim;
 +
    $strDbg .= "'''@preg_match'''(\"$strFinal\",\"$iText\",...)";
 +
    $isMatch = @preg_match($strFinal,$iText,$gRegexMatches);
 +
 
 +
    return $isMatch;
 +
  }
 +
  public function AddErrorLine($iText) {
 +
    global $gErrorText;
 +
 
 +
    $gErrorText .= $iText."\n";
 +
  }
 +
  public function ReportErrors() {
 +
    global $wgUser;
 +
    global $wgEmergencyContact;
 +
    global $gErrorText;
 +
 
 +
    if ($gErrorText) {
 +
$msgEmail = 'Filter error report for user '.$wgUser->getName().":\n\n";
 +
$msgEmail .= $gErrorText;
 +
mail($wgEmergencyContact,'spamferret filter error',$msgEmail);
 +
    }
 +
  }
 +
  /*----
 +
    ACTION: Create a new record for the current client
 +
  */
 +
  public function CreateClient() {
 +
    $sql = 'INSERT INTO client (Address,WhenFirst,Count,Retries) VALUES("'.$this->strIPAddr.'",NOW(),1,0)';
 +
    $this->dbSpam->Exec($sql);
 +
  }
 +
  /*----
 +
    ACTION: Update a client's record to reflect a new spam attempt
 +
  */
 +
  public function RecordClientSpam() {
 +
    if ($this->doClearThrottle) {
 +
      $strRetries = '0';
 +
    } else {
 +
      $strRetries = 'Retries+1';
 +
    }
 +
    $sql = 'UPDATE client SET WhenLast=NOW(),Count=Count+1, Retries='.$strRetries.' WHERE Address="'.$this->strIPAddr.'"';
 +
    $this->dbSpam->Exec($sql);
 +
    if ($this->dbSpam->RowsAffected() < 1) {
 +
      $this->CreateClient();
 +
      $this->AddErrorLine('Record not found for client '.$this->strIPAddr);
 +
    }
 +
  }
 +
  /*-----
 +
    INPUT:
 +
      $this->idPattern
 +
      $iarArgs['edit-raw']
 +
  */
 +
  public function RecordAttempt(array $iarArgs) {
 +
    global $wgTitle, $wgServer;
 +
 
 +
    $iCode = $iarArgs['code'];
 +
    $txtDiff = $iarArgs['diff']; // was $this->txtDiff
 +
    $txtEdit = $iarArgs['edit-raw']; // was $this->txtEditChk
 +
    $this->RecordClientSpam();
 +
 
 +
    $sqlCode = '"'.$iCode.'"';
 +
    $sqlURL = '"'.$this->dbSpam->SafeParam($wgTitle->getFullURL()).'"';
 +
    $sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"';
 +
    $sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"';
 +
    $sqlEdit = '"'.$this->dbSpam->SafeParam($txtEdit).'"';
 +
    $sqlDiff = '"'.$this->dbSpam->SafeParam($txtDiff).'"';
 +
    $sqlPattern = $this->idPattern;
 +
    if (is_null($sqlPattern)) {
 +
      $sqlPattern = 'NULL';
 +
      $isPattern = FALSE;
 +
    } else {
 +
      $isPattern = TRUE;
 +
    }
 +
    $sqlAddr = '"'.$this->strIPAddr.'"';
 +
    //$sqlMatch = is_null($iMatch)?'NULL':'"'.$iMatch.'"';
 +
    $sql = 'INSERT INTO attempt (`When`,ID_Pattern,Addr_Client,IDS_Session,PageServer,PageName,Code,didAllow,Edit,Diff) VALUES'
 +
      .'(NOW(),'
 +
      .$sqlPattern.','
 +
      .$sqlAddr.','
 +
      .SQL_Value(session_id()).','
 +
      .$sqlSrvr.','
 +
      .$sqlPage.','
 +
      .$sqlCode.','
 +
      .'FALSE,'
 +
      .$sqlEdit.','
 +
      .$sqlDiff.')';
 +
    $ok = $this->dbSpam->Exec($sql);
 +
    if ($ok !== TRUE) {
 +
      $this->AddErrorLine('SQL ['.$sql.'] in RecordAttempt() generated this error: '.$ok);
 +
    }
 +
    if ($isPattern) {
 +
      $sql = 'UPDATE patterns SET WhenTried=NOW(), Count=Count+1 WHERE ID='.$this->idPattern;
 +
      $ok = $this->dbSpam->Exec($sql);
 +
      if ($ok !== TRUE) {
 +
  $this->AddErrorLine('SQL ['.$sql.'] generated this error: '.$ok);
 +
      }
 +
    }
 +
  }
 +
  /*
 +
    INPUT:
 +
      $this->txtEditChk
 +
  */
 +
  public function RecordOkEdit(array $iarArgs) {
 +
    global $wgTitle, $wgServer;
 +
 
 +
    $txtDiff = $iarArgs['diff']; // was $this->txtDiff
 +
    $txtEdit = $iarArgs['edit-to-check']; // was $this->txtEditChk
 +
 
 +
    $sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"';
 +
    $sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"';
 +
    $sqlEdit = '"'.$this->dbSpam->SafeParam($txtEdit).'"';
 +
    $sqlDiff = '"'.$this->dbSpam->SafeParam($txtDiff).'"';
 +
    $sql = 'INSERT INTO attempt (`When`,ID_Pattern,Addr_Client,IDS_Session,PageServer,PageName,Code,didAllow,Edit,Diff) VALUES '
 +
      .'(NOW(),NULL,'
 +
      .'"'.$this->strIPAddr.'",'
 +
      .SQL_Value(session_id()).','
 +
      .$sqlSrvr.','
 +
      .$sqlPage.',"ok",TRUE,'
 +
      .$sqlEdit.','
 +
      .$sqlDiff.')';
 +
    $ok = $this->dbSpam->Exec($sql);
 +
    if ($ok !== TRUE) {
 +
      $this->AddErrorLine('SQL ['.$sql.'] in RecordOkEdit() generated this error: '.$ok);
 +
    }
 +
  }
 +
}
 +
 
 +
function ErrorHandler  ($errno  ,$errstr) {
 +
  global $errNum, $errStr;
  
if ($this->idClient != 0) {
+
  $errNum = $errno;
if ($this->doClearThrottle) {
+
  $errStr = $errstr;
$strRetries = '0';
+
}
} else {
 
$strRetries = 'Retries+1';
 
}
 
$sql = 'UPDATE clients SET WhenLast=NOW(),Count=Count+1, Retries='.$strRetries.' WHERE Address="'.$this->strIPAddr.'"';
 
$this->dbSpam->Exec($sql);
 
} else {
 
$sql = 'INSERT INTO clients (Address,WhenFirst,Count,Retries) VALUES("'.$this->strIPAddr.'",NOW(),1,0)';
 
$this->dbSpam->Exec($sql);
 
$this->idClient = $this->dbSpam->NewID();
 
}
 
  
$sqlURL = '"'.$this->dbSpam->SafeParam($wgTitle->getFullURL()).'"';
+
function SQL_Value($iVar) {
$sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"';
+
  if (is_null($iVar)) {
$sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"';
+
    return 'NULL';
$sql = 'INSERT INTO attempts (`When`,ID_Pattern,ID_Client,PageServer,PageName) VALUES (NOW(),'.$this->idPattern.','.$this->idClient.','.$sqlSrvr.','.$sqlPage.')';
+
  } else {
$this->dbSpam->Exec($sql);
+
    if (is_numeric($iVar)) {
$sql = 'UPDATE patterns SET WhenTried=NOW(), Count=Count+1 WHERE ID='.$this->idPattern;
+
      return $iVar;
$this->dbSpam->Exec($sql);
+
    } else {
}
+
      return '"'.$iVar.'"';
 +
    }
 +
  }
 
}
 
}
 +
function FigureDiff($iTextOld, $iTextNew) {
 +
  # Make temporary files
 +
// $td = wfTempDir();
 +
  $td = session_save_path();
 +
  $fhOld = fopen( $fnOld = tempnam( $td, 'SpamFerret-old-' ), 'w' );
 +
  $fhNew = fopen( $fnNew = tempnam( $td, 'SpamFerret-new-' ), 'w' );
 +
 +
  fwrite( $fhOld, $iTextOld ); fclose( $fhOld );
 +
  fwrite( $fhNew, $iTextNew ); fclose( $fhNew );
  
 +
  $ksSpamFerretDiffOptions = '-i -E --suppress-common-lines '; // this setting applies to all uses of a given filter DB
 +
  // can change for different DBs
 +
  $cmd = 'diff '.$ksSpamFerretDiffOptions.wfEscapeShellArg( $fnOld, $fnNew );
 +
  $handle = popen( $cmd, 'r' );
 +
  $result = '';
 +
  do {
 +
    $data = fread( $handle, 8192 );
 +
    if ( strlen( $data ) == 0 ) {
 +
      break;
 +
    }
 +
    $result .= $data;
 +
  } while ( true );
 +
  pclose( $handle );
 +
  unlink( $fnNew ); unlink( $fnOld );
 +
  //return 'cmd=['.$cmd.'] diff=['.$result.']';
 +
  return $result;
 +
}
  
 
} // end of 'MEDIAWIKI' check
 
} // end of 'MEDIAWIKI' check
?>
 
</php>
 

Latest revision as of 17:49, 26 April 2011

Navigation

{{#lst:SpamFerret|navbar}}: SpamFerret.php

Code

<php><?php /*

HISTORY:
 2007-08-19 (Wzl) fixed line 155 call to clsDatabase::Query() (method deprecated and removed)
 2007-09-30 (Wzl) fixing regex processing
 2007-10-11 (Wzl) logging ampersandbot attempts; spam throttle (automatic temporary blacklist)
 2007-10-13 (Wzl) fixed some issues which were preventing throttling from working - mainly changes to SQL
 2007-10-15 (Wzl) "Code" wasn't being recorded. Decided that normal filtering should use code '-' so NULL means something is wrong.
 2007-10-28 (Wzl) Events with THR and AMP codes *still* weren't being recorded because method call was improperly formatted.
 2007-12-23 (Wzl) Emails wikimaster if eregi() returns an error (due to improperly formatted regex)
 2007-12-26 (Wzl) Spam turd rejection / logging
 2007-12-27 (Wzl) Fixed spam turd detection to work for new pages too (will probably need refinement)
 2007-12-27 (Wzl) Ooops. Replaced missing if-block from regex results when inserted text is found.
 2008-08-29 (Wzl) Added permanent IP blocking
 2008-09-04 (Wzl) Added (optional) logging of successful edits
 2008-09-19 (Wzl) Actually *set* the "didEdit" flag for successful edits <facepalms>
 2008-10-21 (Wzl) Fixed minor syntax error in "defines"
 2009-02-25 (Wzl) $objArticleCurr->loadLastEdit() now causes error in MW 1.14 (was it necessary before?)
 2009-03-10 (Wzl) "require" -> "require_once" so other extensions can use data.php without conflict
   also optional $kfpWzlLibs so data.php can be somewhere not on the path
 2009-03-18 (Wzl) Got rid of shared.php requirement; now using kfpLib to locate data.php
 2009-03-26 (Wzl) Rewrote data library calls to use new classes (no longer using deprecated/removed classes)
   Also modified to use newer function hooks
 2009-04-24 (Wzl) fixed "strict" bug referencing unset $txtCurr when creating new page
 2009-07-05 (Wzl) Using LibMgr
 2009-07-14 (Wzl) Added attempts.Diff field, patterns.isDIff
 2009-07-15 (Wzl)
   On advice from FreeNode##php, changed from eregi() to preg_match()
   Added option to match diff results instead of submitted edit only
   Saves diff of each change, approved or not
   BUG: approved edits are not being logged properly; using "OK" code and logging as failed
 2009-07-26 (Wzl) fixed minor warning error on line 252
 2009-08-07 (Wzl) email notification working; removed TRD and AMP hard-coded offenses, to be redone as isDiff filters if needed
 2009-08-08 (Wzl) create client record immediately if client is not recognized; don't depend on spam filter being triggered
 2009-08-09 (Wzl) restructured "client" and "attempt" tables (was "clients", "attempts"); not backwards-compatible
 2010-02-24 (Wzl) some code-tidying; trying to restrict passing of data between methods to single array var in args/return
 2010-08-17 (Wzl) added some debug code to CheckRegex(); fixed problem with escaped chars in filter
 2011-04-26 (Wzl) minor bug fixes; hand-merge with version on htyp.org
TO DO:
 * Throttled save attempts should check for spam, just for data-gathering purposes.
   Possibly non-spam from a throttled IP should not update the "WhenLast" timestamp. Maybe this should be a LocalSettings option?
 * Figure out how to display a different error message than "the following text is what triggered our filter:"
OPTIONAL SETTINGS:
 kfpLib - path to data.php folder (no final slash)
 kfsLib_Data - filespec of data.php
  • /
  1. Loader for spam blacklist feature
  2. Include this from LocalSettings.php

if ( defined( 'MEDIAWIKI' ) ) {

$wgExtensionCredits['other'][] = array( 'name' => 'SpamFerret', 'author' => 'Woozle Staddon', 'url' => 'http://htyp.org/SpamFerret', 'version' => '2011-04-26', 'description' => 'database-driven wikispam blocker', ); if (!defined('kfsLib_Data')) {

   if (defined('kfpLib')) {

define('kfsLib_Data', kfpLib.'/data.php');

   } else {

define('kfsLib_Data','data.php'); // assume it's on the path

   }

} if (!defined('LIBMGR')) {

   require('libmgr.php');

} clsLibMgr::Add('data', kfsLib_Data,__FILE__,__LINE__); clsLibMgr::Load('data',__FILE__,__LINE__);

/* ==============

SET UP CALLBACKS
  • /

global $wgFilterCallback, $wgPreSpamFilterCallback;

$wgPreSpamFilterCallback = false; if ( defined( 'MW_SUPPORTS_EDITFILTERMERGED' ) ) {

   $wgHooks['EditFilterMerged'][] = 'wfSpamFerretMerged';

} else {

   if ( $wgFilterCallback ) {
       $wgPreSpamFilterCallback = $wgFilterCallback;
   }
   $wgFilterCallback = 'wfSpamFerretFilter';

}

/* $wgHooks['EditFilter'][] = 'wfSpamFerretValidate'; $wgHooks['ArticleSaveComplete'][] = 'wfSpamFerretArticleSave'; $wgHooks['APIEditBeforeSave'][] = 'wfSpamFerretAPIEditBeforeSave';

  • /

/* ================

SET GLOBAL OBJECTS
  • /

function GetSpamFerret() {

   static $objFerret;
   if (!isset($objFerret)) {

$objFerret = new SpamFerret();

   }
   return $objFerret;

} /* ================

CALLBACK FUNCTIONS
  • /

/**

* Hook function for $wgFilterCallback
*/

//function wfSpamFerretFilter( &$title, $text, $section, &$hookErr, $editSummary ) { function wfSpamFerretFilter( &$title, $text, $section ) {

   global $wgOut;
   $spamObj = GetSpamFerret();
   $wgOut->addWikiText( "Intercepted by SpamFerretFilter" );

// $ret = $spamObj->filter( $title, $text, , $editSummary, $editPage );

   return $spamObj->filter( $title, $text, $section );

}

/**

* Hook function for EditFilterMerged, replaces wfSpamBlacklistFilter
*/

function wfSpamFerretMerged( $editPage, $text, &$hookErr, $editSummary ) {

   global $wgTitle,$wgOut;
   if( is_null( $wgTitle ) ) {
       # API mode
       # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist
       return true;
   }
   $spamObj = GetSpamFerret();
   $title = $editPage->mArticle->getTitle();

// $ret = $spamObj->filter( $title, $text, , $editSummary, $editPage );

   $ret = $spamObj->filter( $title, $text, , $editPage );
   if ( $ret !== false ) $editPage->spamPage( $ret );

// additional text can be added here: // $wgOut->addWikiText( "Intercepted by SpamFerretMerged" );

   // Return convention for hooks is the inverse of $wgFilterCallback
   return ( $ret === false );

} /**

* Hook function for APIEditBeforeSave
*/

function wfSpamFerretAPIEditBeforeSave( &$editPage, $text, &$resultArr ) { }

class SpamFerret {

 var $previousFilter = false;

// internal data

 var $dbSpam;
 var $objDataClients;
 var $strIPAddr;
 var $idPattern;
 var $doClearThrottle;
 
 function Setting($iName) {
   global $wgSpamFerretSettings;
   return $wgSpamFerretSettings[$iName];
 }
 /*-----
   PROPERTIES USED: $this->idPattern (out)
 */
 function filter( &$title, $text, $section, $editPage = FALSE ) {
   global $wgArticle, $wgDBname, $wgMemc, $messageMemc, $wgVersion, $wgOut;
   global $wgTitle, $wgServer;
   global $debug;
   global $errNum, $errStr;
   global $gErrorText;
   // debugging:
   global $sql;
   $fname = 'wfSpamFerretFilter';
   wfProfileIn( $fname );
   ini_set('track_errors', 1);
   # Call the rest of the hook chain first
   if ( $this->previousFilter ) {

$f = $this->previousFilter; if ( $f( $title, $text, $section ) ) { wfProfileOut( $fname ); return true; }

   }

// initialize variables

   $retVal = FALSE;	// default = assume edit is ok
   $gErrorText = FALSE;
   $isClientKnown = FALSE;
   $this->txtEditRaw = $text;		// DEPRECATED
   $arArgs['edit-raw'] = $text;

// get the IP address of the http client making the edit attempt:

   $this->strIPAddr = wfGetIP();

// Open the database

   $this->OpenDatabase();

// open clients table (extended Throttle version) for reference: //return TRUE;

   $objTblClients = new clsTable($this->dbSpam);
     $objTblClients->Name('ClientThrottle2');
     $objTblClients->KeyName('Address');

// Look up to see if this IP is known; it may already be throttled:

     $this->objDataClients = $objTblClients->GetData('Address="'.$this->strIPAddr.'"');
     if (is_object($this->objDataClients)) {

if ($this->objDataClients->hasRows()) { $isClientKnown = TRUE; }

     }
     if ($isClientKnown) {

$this->objDataClients->FirstRow(); $doBlock = $this->objDataClients->doBlock; if ($doBlock) { $strThrType = 'BLK'; } else { $intRetries = $this->objDataClients->Retries; $intThrottle = $this->Setting('throttle_retries'); $doBlock = $intRetries > $intThrottle; $strThrType = 'THR-'.$intRetries; } if ($doBlock) { $arArgs['diff'] = NULL; // not applicable // retry limit exceeded; check timeout limit if ($this->objDataClients->ThrottleTime < $this->Setting('throttle_timeout')) { $txtMsg = 'Too many spam attempts from your IP address ('.$this->strIPAddr.'). Please come back later.'; if (is_object($editPage)) { $editPage->spamPage($txtMsg); } else { EditPage::spamPage($txtMsg); // older MW code doesn't supply $editPage } $arArgs['code'] = $strThrType; $this->RecordAttempt($arArgs); // record post attempt by throttled client $retVal = true; // client has exceeded spam limit; impose throttle } else { $this->doClearThrottle = true; } }

     } else {

$this->CreateClient();

     }

//$this->txtDiff = 'N/A'; $arArgs['diff'] = 'N/A';

if (!$retVal) { $arRtn = $this->GetDiff($title); // get the diff between edit and current contents $arArgs['diff'] = $arRtn['diff']; $arArgs['doAll'] = FALSE; $arRtn = $this->CheckFilters($arArgs); $arArgs['edit-to-check'] = $arRtn['edit-to-check']; if ( $this->isMatch ) { // spam cue found; display the matching text and don't allow the edit to be saved: wfDebug( "Match!\n" );

// The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:" $retVal = '(pattern #'.$this->idPattern.') ['.htmlspecialchars($this->strMatch).']'; // Log the spam attempt: $arArgs['code'] = '-'; $this->RecordAttempt($arArgs); } else { // no spam cues found; allow the edit to be saved, if nothing else has tripped the filter //if ($this->Setting('log_ok_edits')) { $this->idPattern = NULL; $this->RecordOkEdit($arArgs); //$this->RecordAttempt('OK'); //} } }

wfProfileOut( $fname ); $this->ReportErrors(); //$wgOut->addHTML($out); return $retVal; /**/

 }
 public function OpenDatabase() {
   $this->dbSpam = new clsDatabase($this->Setting('dbspec'));
   $this->dbSpam->Open();
 }
 public function FiltTbl() {
   $doLoad = TRUE;
   $doLoad = empty($this->objFilts);
   if ($doLoad) {
     $objTbl = new clsTable($this->dbSpam);

$objTbl->Name('patterns'); $objTbl->KeyName('ID');

     $this->objFilts = $objTbl;
   }
   return $this->objFilts;
 }
 /*-----
   INPUT:
     $iTitle - page for comparing proposed edit
     $this->txtEditRaw (DEPRECATED; use $iarArgs['text-to-check']
   OUTPUT:
     return ['diff']
 */
 public function GetDiff($iTitle) {
   $objArticleCurr = new Article($iTitle);
   if ($objArticleCurr->exists()) {
     $txtCurr = $objArticleCurr->getContent();
     $txtDiff = FigureDiff($txtCurr,$this->txtEditRaw);
   } else {
     $txtDiff = '!!NEW: '.$this->txtEditRaw;
   }
   //$this->txtDiff = $txtDiff;
   $arOut['diff'] = $txtDiff;
   return $arOut;

}

   /*-----
     INPUT:

$iarArgs['doAll'] $iarArgs['diff'] $this->txtEditRaw - DEPRECATED; use $iarArgs

     OUTPUT:

$this->idPattern

   */
   public function CheckFilters(array $iarArgs) {
   global $gRegexMatches,$gFilterMatches,$gFilterRows,$gFilterCount;
   global $debug;
   assert('is_object($this->dbSpam)');
   $doCheckAll = $iarArgs['doAll'];
   $strChkDiff = strtolower(nz($iarArgs['diff']));
   $objFiltTbl = $this->FiltTbl();
   if ($doCheckAll) {

$sqlFilt = NULL;

   } else {

$sqlFilt = 'isActive';

   }
   $objFiltRows = $objFiltTbl->GetData($sqlFilt);
   $objRow = $objFiltRows;	// for shorthand
   $strTextEdit = strtolower($this->txtEditRaw);
   //$this->txtEditChk = $strTextEdit;	// text after being massaged for checking
   $arOut['edit-to-check'] = $strTextEdit;	// text after being massaged for checking
   $this->isMatch = FALSE;
   $gFilterCount = 0;
   $gFilterRows = $objRow->RowCount();
   while($objRow->NextRow() && (!$this->isMatch || $doCheckAll)) {
     $isMatch = FALSE;
     if ($objRow->isDiff) {

$strTextCk = $strChkDiff;

     } else {

$strTextCk = $strTextEdit;

     }
     if (!is_null($strTextCk)) {

$gFilterCount++; $strPattern = strtolower($objRow->Pattern); $isRegex = $objRow->isRegex; $this->idPattern = $objRow->ID; if ($isRegex) { $isMatch = $this->CheckRegex($strPattern,$strTextCk);

if (isset($php_errormsg)) { $this->AddErrorLine('Filter #'.$this->idPattern.' generated error "'.$php_errormsg); }

if ($isMatch) { $this->strMatch = $gRegexMatches[0]; } } else { if (empty($strPattern)) { $isMatch = FALSE; } else { $this->strMatch = stristr($strTextCk,$strPattern); $isMatch = ($this->strMatch != ); } } if ($isMatch) { $this->isMatch = TRUE; if ($doCheckAll) { $gFilterMatches[$this->idPattern] = $this->strMatch; } }

     }
   }
 }
 /*----
   TO DO:
     replace $gRegexMatches with return array
     make this function static
 */
 public function CheckRegex($iPattern,$iText) {
   global $gRegexMatches,$strDbg;
   $chDelim = '/';
   $strPattCk = $iPattern;
   // (2010-08-17) this next line causes incorrect handling of escaped characters in the filter
   //$strPattCk = str_replace('\\','\\\\',$strPattCk);	// make sure filter backslashes are prefixed to be literal
   $strPattCk = str_replace($chDelim,'\\'.$chDelim,$strPattCk);
   unset($php_errormsg);	// TO DO: explain this
   $strFinal = $chDelim.$strPattCk.$chDelim;
   $strDbg .= "@preg_match(\"$strFinal\",\"$iText\",...)";
   $isMatch = @preg_match($strFinal,$iText,$gRegexMatches);
   return $isMatch;
 }
 public function AddErrorLine($iText) {
   global $gErrorText;
   $gErrorText .= $iText."\n";
 }
 public function ReportErrors() {
   global $wgUser;
   global $wgEmergencyContact;
   global $gErrorText;
   if ($gErrorText) {

$msgEmail = 'Filter error report for user '.$wgUser->getName().":\n\n"; $msgEmail .= $gErrorText; mail($wgEmergencyContact,'spamferret filter error',$msgEmail);

   }
 }
 /*----
   ACTION: Create a new record for the current client
 */
 public function CreateClient() {
   $sql = 'INSERT INTO client (Address,WhenFirst,Count,Retries) VALUES("'.$this->strIPAddr.'",NOW(),1,0)';
   $this->dbSpam->Exec($sql);
 }
 /*----
   ACTION: Update a client's record to reflect a new spam attempt
 */
 public function RecordClientSpam() {
   if ($this->doClearThrottle) {
     $strRetries = '0';
   } else {
     $strRetries = 'Retries+1';
   }
   $sql = 'UPDATE client SET WhenLast=NOW(),Count=Count+1, Retries='.$strRetries.' WHERE Address="'.$this->strIPAddr.'"';
   $this->dbSpam->Exec($sql);
   if ($this->dbSpam->RowsAffected() < 1) {
     $this->CreateClient();
     $this->AddErrorLine('Record not found for client '.$this->strIPAddr);
   }
 }
 /*-----
   INPUT:
     $this->idPattern
     $iarArgs['edit-raw']
 */
 public function RecordAttempt(array $iarArgs) {
   global $wgTitle, $wgServer;
   $iCode = $iarArgs['code'];
   $txtDiff = $iarArgs['diff'];		// was $this->txtDiff
   $txtEdit = $iarArgs['edit-raw'];	// was $this->txtEditChk
   $this->RecordClientSpam();
   $sqlCode = '"'.$iCode.'"';
   $sqlURL = '"'.$this->dbSpam->SafeParam($wgTitle->getFullURL()).'"';
   $sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"';
   $sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"';
   $sqlEdit = '"'.$this->dbSpam->SafeParam($txtEdit).'"';
   $sqlDiff = '"'.$this->dbSpam->SafeParam($txtDiff).'"';
   $sqlPattern = $this->idPattern;
   if (is_null($sqlPattern)) {
     $sqlPattern = 'NULL';
     $isPattern = FALSE;
   } else {
     $isPattern = TRUE;
   }
   $sqlAddr = '"'.$this->strIPAddr.'"';
   //$sqlMatch = is_null($iMatch)?'NULL':'"'.$iMatch.'"';
   $sql = 'INSERT INTO attempt (`When`,ID_Pattern,Addr_Client,IDS_Session,PageServer,PageName,Code,didAllow,Edit,Diff) VALUES'
     .'(NOW(),'
     .$sqlPattern.','
     .$sqlAddr.','
     .SQL_Value(session_id()).','
     .$sqlSrvr.','
     .$sqlPage.','
     .$sqlCode.','
     .'FALSE,'
     .$sqlEdit.','
     .$sqlDiff.')';
   $ok = $this->dbSpam->Exec($sql);
   if ($ok !== TRUE) {
     $this->AddErrorLine('SQL ['.$sql.'] in RecordAttempt() generated this error: '.$ok);
   }
   if ($isPattern) {
     $sql = 'UPDATE patterns SET WhenTried=NOW(), Count=Count+1 WHERE ID='.$this->idPattern;
     $ok = $this->dbSpam->Exec($sql);
     if ($ok !== TRUE) {

$this->AddErrorLine('SQL ['.$sql.'] generated this error: '.$ok);

     }
   }
 }
 /*
   INPUT:
     $this->txtEditChk
 */
 public function RecordOkEdit(array $iarArgs) {
   global $wgTitle, $wgServer;
   $txtDiff = $iarArgs['diff'];		// was $this->txtDiff
   $txtEdit = $iarArgs['edit-to-check'];	// was $this->txtEditChk
   $sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"';
   $sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"';
   $sqlEdit = '"'.$this->dbSpam->SafeParam($txtEdit).'"';
   $sqlDiff = '"'.$this->dbSpam->SafeParam($txtDiff).'"';
   $sql = 'INSERT INTO attempt (`When`,ID_Pattern,Addr_Client,IDS_Session,PageServer,PageName,Code,didAllow,Edit,Diff) VALUES '
     .'(NOW(),NULL,'
     .'"'.$this->strIPAddr.'",'
     .SQL_Value(session_id()).','
     .$sqlSrvr.','
     .$sqlPage.',"ok",TRUE,'
     .$sqlEdit.','
     .$sqlDiff.')';
   $ok = $this->dbSpam->Exec($sql);
   if ($ok !== TRUE) {
     $this->AddErrorLine('SQL ['.$sql.'] in RecordOkEdit() generated this error: '.$ok);
   }
 }

}

function ErrorHandler ($errno ,$errstr) {

 global $errNum, $errStr;
 $errNum = $errno;
 $errStr = $errstr;

}

function SQL_Value($iVar) {

 if (is_null($iVar)) {
   return 'NULL';
 } else {
   if (is_numeric($iVar)) {
     return $iVar;
   } else {
     return '"'.$iVar.'"';
   }
 }

} function FigureDiff($iTextOld, $iTextNew) {

 # Make temporary files

// $td = wfTempDir();

 $td = session_save_path();
 $fhOld = fopen( $fnOld = tempnam( $td, 'SpamFerret-old-' ), 'w' );
 $fhNew = fopen( $fnNew = tempnam( $td, 'SpamFerret-new-' ), 'w' );
 fwrite( $fhOld, $iTextOld ); fclose( $fhOld );
 fwrite( $fhNew, $iTextNew ); fclose( $fhNew );
 $ksSpamFerretDiffOptions = '-i -E --suppress-common-lines ';	// this setting applies to all uses of a given filter DB
 // can change for different DBs
 $cmd = 'diff '.$ksSpamFerretDiffOptions.wfEscapeShellArg( $fnOld, $fnNew );
 $handle = popen( $cmd, 'r' );
 $result = ;
 do {
   $data = fread( $handle, 8192 );
   if ( strlen( $data ) == 0 ) {
     break;
   }
   $result .= $data;
 } while ( true );
 pclose( $handle );
 unlink( $fnNew ); unlink( $fnOld );
 //return 'cmd=['.$cmd.'] diff=['.$result.']';
 return $result;

}

} // end of 'MEDIAWIKI' check