Difference between revisions of "SpamFerret.php"
(→Code: 12/27 spam-turd-detecting version) |
(hand-merge of /dev and previous wiki version, with minor fixes) |
||
(11 intermediate revisions by the same user not shown) | |||
Line 15: | Line 15: | ||
2007-12-26 (Wzl) Spam turd rejection / logging | 2007-12-26 (Wzl) Spam turd rejection / logging | ||
2007-12-27 (Wzl) Fixed spam turd detection to work for new pages too (will probably need refinement) | 2007-12-27 (Wzl) Fixed spam turd detection to work for new pages too (will probably need refinement) | ||
+ | 2007-12-27 (Wzl) Ooops. Replaced missing if-block from regex results when inserted text is found. | ||
+ | 2008-08-29 (Wzl) Added permanent IP blocking | ||
+ | 2008-09-04 (Wzl) Added (optional) logging of successful edits | ||
+ | 2008-09-19 (Wzl) Actually *set* the "didEdit" flag for successful edits <facepalms> | ||
+ | 2008-10-21 (Wzl) Fixed minor syntax error in "defines" | ||
+ | 2009-02-25 (Wzl) $objArticleCurr->loadLastEdit() now causes error in MW 1.14 (was it necessary before?) | ||
+ | 2009-03-10 (Wzl) "require" -> "require_once" so other extensions can use data.php without conflict | ||
+ | also optional $kfpWzlLibs so data.php can be somewhere not on the path | ||
+ | 2009-03-18 (Wzl) Got rid of shared.php requirement; now using kfpLib to locate data.php | ||
+ | 2009-03-26 (Wzl) Rewrote data library calls to use new classes (no longer using deprecated/removed classes) | ||
+ | Also modified to use newer function hooks | ||
+ | 2009-04-24 (Wzl) fixed "strict" bug referencing unset $txtCurr when creating new page | ||
+ | 2009-07-05 (Wzl) Using LibMgr | ||
+ | 2009-07-14 (Wzl) Added attempts.Diff field, patterns.isDIff | ||
+ | 2009-07-15 (Wzl) | ||
+ | On advice from FreeNode##php, changed from eregi() to preg_match() | ||
+ | Added option to match diff results instead of submitted edit only | ||
+ | Saves diff of each change, approved or not | ||
+ | BUG: approved edits are not being logged properly; using "OK" code and logging as failed | ||
+ | 2009-07-26 (Wzl) fixed minor warning error on line 252 | ||
+ | 2009-08-07 (Wzl) email notification working; removed TRD and AMP hard-coded offenses, to be redone as isDiff filters if needed | ||
+ | 2009-08-08 (Wzl) create client record immediately if client is not recognized; don't depend on spam filter being triggered | ||
+ | 2009-08-09 (Wzl) restructured "client" and "attempt" tables (was "clients", "attempts"); not backwards-compatible | ||
+ | 2010-02-24 (Wzl) some code-tidying; trying to restrict passing of data between methods to single array var in args/return | ||
+ | 2010-08-17 (Wzl) added some debug code to CheckRegex(); fixed problem with escaped chars in filter | ||
+ | 2011-04-26 (Wzl) minor bug fixes; hand-merge with version on htyp.org | ||
TO DO: | TO DO: | ||
− | |||
* Throttled save attempts should check for spam, just for data-gathering purposes. | * Throttled save attempts should check for spam, just for data-gathering purposes. | ||
Possibly non-spam from a throttled IP should not update the "WhenLast" timestamp. Maybe this should be a LocalSettings option? | Possibly non-spam from a throttled IP should not update the "WhenLast" timestamp. Maybe this should be a LocalSettings option? | ||
* Figure out how to display a different error message than "the following text is what triggered our filter:" | * Figure out how to display a different error message than "the following text is what triggered our filter:" | ||
+ | OPTIONAL SETTINGS: | ||
+ | kfpLib - path to data.php folder (no final slash) | ||
+ | kfsLib_Data - filespec of data.php | ||
*/ | */ | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
# Loader for spam blacklist feature | # Loader for spam blacklist feature | ||
Line 34: | Line 54: | ||
if ( defined( 'MEDIAWIKI' ) ) { | if ( defined( 'MEDIAWIKI' ) ) { | ||
− | |||
+ | $wgExtensionCredits['other'][] = array( | ||
+ | 'name' => 'SpamFerret', | ||
+ | 'author' => 'Woozle Staddon', | ||
+ | 'url' => 'http://htyp.org/SpamFerret', | ||
+ | 'version' => '2011-04-26', | ||
+ | 'description' => 'database-driven wikispam blocker', | ||
+ | ); | ||
+ | if (!defined('kfsLib_Data')) { | ||
+ | if (defined('kfpLib')) { | ||
+ | define('kfsLib_Data', kfpLib.'/data.php'); | ||
+ | } else { | ||
+ | define('kfsLib_Data','data.php'); // assume it's on the path | ||
+ | } | ||
+ | } | ||
+ | if (!defined('LIBMGR')) { | ||
+ | require('libmgr.php'); | ||
+ | } | ||
+ | clsLibMgr::Add('data', kfsLib_Data,__FILE__,__LINE__); | ||
+ | clsLibMgr::Load('data',__FILE__,__LINE__); | ||
+ | |||
+ | /* ============== | ||
+ | SET UP CALLBACKS | ||
+ | */ | ||
global $wgFilterCallback, $wgPreSpamFilterCallback; | global $wgFilterCallback, $wgPreSpamFilterCallback; | ||
− | if ( | + | $wgPreSpamFilterCallback = false; |
− | + | if ( defined( 'MW_SUPPORTS_EDITFILTERMERGED' ) ) { | |
+ | $wgHooks['EditFilterMerged'][] = 'wfSpamFerretMerged'; | ||
} else { | } else { | ||
− | + | if ( $wgFilterCallback ) { | |
+ | $wgPreSpamFilterCallback = $wgFilterCallback; | ||
+ | } | ||
+ | $wgFilterCallback = 'wfSpamFerretFilter'; | ||
} | } | ||
− | $ | + | /* |
− | $ | + | $wgHooks['EditFilter'][] = 'wfSpamFerretValidate'; |
− | + | $wgHooks['ArticleSaveComplete'][] = 'wfSpamFerretArticleSave'; | |
− | + | $wgHooks['APIEditBeforeSave'][] = 'wfSpamFerretAPIEditBeforeSave'; | |
− | + | */ | |
− | + | ||
− | + | /* ================ | |
− | ); | + | SET GLOBAL OBJECTS |
+ | */ | ||
+ | function GetSpamFerret() { | ||
+ | static $objFerret; | ||
+ | |||
+ | if (!isset($objFerret)) { | ||
+ | $objFerret = new SpamFerret(); | ||
+ | } | ||
+ | return $objFerret; | ||
+ | } | ||
+ | /* ================ | ||
+ | CALLBACK FUNCTIONS | ||
+ | */ | ||
+ | /** | ||
+ | * Hook function for $wgFilterCallback | ||
+ | */ | ||
+ | //function wfSpamFerretFilter( &$title, $text, $section, &$hookErr, $editSummary ) { | ||
+ | function wfSpamFerretFilter( &$title, $text, $section ) { | ||
+ | global $wgOut; | ||
+ | |||
+ | $spamObj = GetSpamFerret(); | ||
+ | $wgOut->addWikiText( "Intercepted by SpamFerretFilter" ); | ||
+ | // $ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage ); | ||
+ | return $spamObj->filter( $title, $text, $section ); | ||
+ | } | ||
+ | |||
+ | /** | ||
+ | * Hook function for EditFilterMerged, replaces wfSpamBlacklistFilter | ||
+ | */ | ||
+ | function wfSpamFerretMerged( $editPage, $text, &$hookErr, $editSummary ) { | ||
+ | global $wgTitle,$wgOut; | ||
+ | |||
+ | if( is_null( $wgTitle ) ) { | ||
+ | # API mode | ||
+ | # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist | ||
+ | return true; | ||
+ | } | ||
− | + | $spamObj = GetSpamFerret(); | |
− | + | $title = $editPage->mArticle->getTitle(); | |
− | + | // $ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage ); | |
+ | $ret = $spamObj->filter( $title, $text, '', $editPage ); | ||
+ | if ( $ret !== false ) $editPage->spamPage( $ret ); | ||
− | + | // additional text can be added here: | |
− | + | // $wgOut->addWikiText( "Intercepted by SpamFerretMerged" ); | |
− | |||
− | + | // Return convention for hooks is the inverse of $wgFilterCallback | |
+ | return ( $ret === false ); | ||
+ | } | ||
+ | /** | ||
+ | * Hook function for APIEditBeforeSave | ||
+ | */ | ||
+ | function wfSpamFerretAPIEditBeforeSave( &$editPage, $text, &$resultArr ) { | ||
} | } | ||
+ | |||
class SpamFerret { | class SpamFerret { | ||
− | + | var $previousFilter = false; | |
− | |||
− | |||
− | |||
// internal data | // internal data | ||
− | + | var $dbSpam; | |
− | + | var $objDataClients; | |
− | + | var $strIPAddr; | |
− | + | var $idPattern; | |
− | + | var $doClearThrottle; | |
− | + | ||
− | + | function Setting($iName) { | |
− | + | global $wgSpamFerretSettings; | |
− | + | ||
− | + | return $wgSpamFerretSettings[$iName]; | |
+ | } | ||
+ | |||
+ | /*----- | ||
+ | PROPERTIES USED: $this->idPattern (out) | ||
+ | */ | ||
+ | function filter( &$title, $text, $section, $editPage = FALSE ) { | ||
+ | global $wgArticle, $wgDBname, $wgMemc, $messageMemc, $wgVersion, $wgOut; | ||
+ | global $wgTitle, $wgServer; | ||
+ | global $debug; | ||
+ | global $errNum, $errStr; | ||
+ | global $gErrorText; | ||
+ | // debugging: | ||
+ | global $sql; | ||
− | + | $fname = 'wfSpamFerretFilter'; | |
− | + | wfProfileIn( $fname ); | |
− | |||
− | |||
− | + | ini_set('track_errors', 1); | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | # Call the rest of the hook chain first | |
− | + | if ( $this->previousFilter ) { | |
+ | $f = $this->previousFilter; | ||
+ | if ( $f( $title, $text, $section ) ) { | ||
+ | wfProfileOut( $fname ); | ||
+ | return true; | ||
+ | } | ||
+ | } | ||
+ | // initialize variables | ||
+ | $retVal = FALSE; // default = assume edit is ok | ||
+ | $gErrorText = FALSE; | ||
+ | $isClientKnown = FALSE; | ||
− | + | $this->txtEditRaw = $text; // DEPRECATED | |
− | + | $arArgs['edit-raw'] = $text; | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
// get the IP address of the http client making the edit attempt: | // get the IP address of the http client making the edit attempt: | ||
− | + | $this->strIPAddr = wfGetIP(); | |
// Open the database | // Open the database | ||
− | + | $this->OpenDatabase(); | |
// open clients table (extended Throttle version) for reference: | // open clients table (extended Throttle version) for reference: | ||
− | + | //return TRUE; | |
+ | $objTblClients = new clsTable($this->dbSpam); | ||
+ | $objTblClients->Name('ClientThrottle2'); | ||
+ | $objTblClients->KeyName('Address'); | ||
// Look up to see if this IP is known; it may already be throttled: | // Look up to see if this IP is known; it may already be throttled: | ||
− | + | $this->objDataClients = $objTblClients->GetData('Address="'.$this->strIPAddr.'"'); | |
− | + | if (is_object($this->objDataClients)) { | |
− | + | if ($this->objDataClients->hasRows()) { | |
− | + | $isClientKnown = TRUE; | |
− | + | } | |
− | + | } | |
− | + | if ($isClientKnown) { | |
− | + | $this->objDataClients->FirstRow(); | |
− | + | $doBlock = $this->objDataClients->doBlock; | |
− | + | if ($doBlock) { | |
− | + | $strThrType = 'BLK'; | |
− | + | } else { | |
+ | $intRetries = $this->objDataClients->Retries; | ||
+ | $intThrottle = $this->Setting('throttle_retries'); | ||
+ | $doBlock = $intRetries > $intThrottle; | ||
+ | $strThrType = 'THR-'.$intRetries; | ||
+ | } | ||
+ | if ($doBlock) { | ||
+ | $arArgs['diff'] = NULL; // not applicable | ||
// retry limit exceeded; check timeout limit | // retry limit exceeded; check timeout limit | ||
− | + | if ($this->objDataClients->ThrottleTime < $this->Setting('throttle_timeout')) { | |
− | + | $txtMsg = 'Too many spam attempts from your IP address ('.$this->strIPAddr.'). Please come back later.'; | |
− | + | if (is_object($editPage)) { | |
− | + | $editPage->spamPage($txtMsg); | |
− | + | } else { | |
− | + | EditPage::spamPage($txtMsg); // older MW code doesn't supply $editPage | |
− | + | } | |
− | + | $arArgs['code'] = $strThrType; | |
− | + | $this->RecordAttempt($arArgs); // record post attempt by throttled client | |
+ | $retVal = true; // client has exceeded spam limit; impose throttle | ||
+ | } else { | ||
+ | $this->doClearThrottle = true; | ||
+ | } | ||
+ | } | ||
+ | } else { | ||
+ | $this->CreateClient(); | ||
+ | } | ||
+ | //$this->txtDiff = 'N/A'; | ||
+ | $arArgs['diff'] = 'N/A'; | ||
− | + | if (!$retVal) { | |
− | + | $arRtn = $this->GetDiff($title); // get the diff between edit and current contents | |
− | + | $arArgs['diff'] = $arRtn['diff']; | |
− | + | $arArgs['doAll'] = FALSE; | |
− | + | $arRtn = $this->CheckFilters($arArgs); | |
− | + | $arArgs['edit-to-check'] = $arRtn['edit-to-check']; | |
− | + | if ( $this->isMatch ) { | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | $ | ||
− | $ | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
// spam cue found; display the matching text and don't allow the edit to be saved: | // spam cue found; display the matching text and don't allow the edit to be saved: | ||
− | + | wfDebug( "Match!\n" ); | |
// The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:" | // The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:" | ||
− | + | $retVal = '(pattern #'.$this->idPattern.') ['.htmlspecialchars($this->strMatch).']'; | |
// Log the spam attempt: | // Log the spam attempt: | ||
− | + | $arArgs['code'] = '-'; | |
− | + | $this->RecordAttempt($arArgs); | |
− | + | } else { | |
− | |||
− | |||
− | |||
// no spam cues found; allow the edit to be saved, if nothing else has tripped the filter | // no spam cues found; allow the edit to be saved, if nothing else has tripped the filter | ||
− | + | //if ($this->Setting('log_ok_edits')) { | |
− | + | $this->idPattern = NULL; | |
− | + | $this->RecordOkEdit($arArgs); | |
− | + | //$this->RecordAttempt('OK'); | |
− | + | //} | |
− | + | } | |
− | + | } | |
− | + | wfProfileOut( $fname ); | |
− | + | $this->ReportErrors(); | |
− | |||
− | |||
− | |||
//$wgOut->addHTML($out); | //$wgOut->addHTML($out); | ||
− | + | return $retVal; | |
/**/ | /**/ | ||
+ | } | ||
+ | public function OpenDatabase() { | ||
+ | $this->dbSpam = new clsDatabase($this->Setting('dbspec')); | ||
+ | $this->dbSpam->Open(); | ||
+ | } | ||
+ | public function FiltTbl() { | ||
+ | $doLoad = TRUE; | ||
+ | $doLoad = empty($this->objFilts); | ||
+ | if ($doLoad) { | ||
+ | $objTbl = new clsTable($this->dbSpam); | ||
+ | $objTbl->Name('patterns'); | ||
+ | $objTbl->KeyName('ID'); | ||
+ | $this->objFilts = $objTbl; | ||
+ | } | ||
+ | return $this->objFilts; | ||
+ | } | ||
+ | /*----- | ||
+ | INPUT: | ||
+ | $iTitle - page for comparing proposed edit | ||
+ | $this->txtEditRaw (DEPRECATED; use $iarArgs['text-to-check'] | ||
+ | OUTPUT: | ||
+ | return ['diff'] | ||
+ | */ | ||
+ | public function GetDiff($iTitle) { | ||
+ | $objArticleCurr = new Article($iTitle); | ||
+ | if ($objArticleCurr->exists()) { | ||
+ | $txtCurr = $objArticleCurr->getContent(); | ||
+ | $txtDiff = FigureDiff($txtCurr,$this->txtEditRaw); | ||
+ | } else { | ||
+ | $txtDiff = '!!NEW: '.$this->txtEditRaw; | ||
+ | } | ||
+ | //$this->txtDiff = $txtDiff; | ||
+ | $arOut['diff'] = $txtDiff; | ||
+ | return $arOut; | ||
+ | } | ||
+ | /*----- | ||
+ | INPUT: | ||
+ | $iarArgs['doAll'] | ||
+ | $iarArgs['diff'] | ||
+ | $this->txtEditRaw - DEPRECATED; use $iarArgs | ||
+ | OUTPUT: | ||
+ | $this->idPattern | ||
+ | */ | ||
+ | public function CheckFilters(array $iarArgs) { | ||
+ | global $gRegexMatches,$gFilterMatches,$gFilterRows,$gFilterCount; | ||
+ | global $debug; | ||
+ | |||
+ | assert('is_object($this->dbSpam)'); | ||
+ | |||
+ | $doCheckAll = $iarArgs['doAll']; | ||
+ | $strChkDiff = strtolower(nz($iarArgs['diff'])); | ||
+ | $objFiltTbl = $this->FiltTbl(); | ||
+ | |||
+ | if ($doCheckAll) { | ||
+ | $sqlFilt = NULL; | ||
+ | } else { | ||
+ | $sqlFilt = 'isActive'; | ||
+ | } | ||
+ | |||
+ | $objFiltRows = $objFiltTbl->GetData($sqlFilt); | ||
+ | $objRow = $objFiltRows; // for shorthand | ||
+ | |||
+ | $strTextEdit = strtolower($this->txtEditRaw); | ||
+ | //$this->txtEditChk = $strTextEdit; // text after being massaged for checking | ||
+ | $arOut['edit-to-check'] = $strTextEdit; // text after being massaged for checking | ||
+ | $this->isMatch = FALSE; | ||
+ | $gFilterCount = 0; | ||
+ | $gFilterRows = $objRow->RowCount(); | ||
+ | |||
+ | while($objRow->NextRow() && (!$this->isMatch || $doCheckAll)) { | ||
+ | $isMatch = FALSE; | ||
+ | |||
+ | if ($objRow->isDiff) { | ||
+ | $strTextCk = $strChkDiff; | ||
+ | } else { | ||
+ | $strTextCk = $strTextEdit; | ||
+ | } | ||
+ | |||
+ | if (!is_null($strTextCk)) { | ||
+ | $gFilterCount++; | ||
+ | $strPattern = strtolower($objRow->Pattern); | ||
+ | $isRegex = $objRow->isRegex; | ||
+ | $this->idPattern = $objRow->ID; | ||
+ | if ($isRegex) { | ||
+ | $isMatch = $this->CheckRegex($strPattern,$strTextCk); | ||
+ | |||
+ | if (isset($php_errormsg)) { | ||
+ | $this->AddErrorLine('Filter #'.$this->idPattern.' generated error "'.$php_errormsg); | ||
+ | } | ||
+ | |||
+ | if ($isMatch) { | ||
+ | $this->strMatch = $gRegexMatches[0]; | ||
+ | } | ||
+ | } else { | ||
+ | if (empty($strPattern)) { | ||
+ | $isMatch = FALSE; | ||
+ | } else { | ||
+ | $this->strMatch = stristr($strTextCk,$strPattern); | ||
+ | $isMatch = ($this->strMatch != ''); | ||
+ | } | ||
+ | } | ||
+ | if ($isMatch) { | ||
+ | $this->isMatch = TRUE; | ||
+ | if ($doCheckAll) { | ||
+ | $gFilterMatches[$this->idPattern] = $this->strMatch; | ||
+ | } | ||
} | } | ||
− | public function RecordAttempt($iCode,$ | + | } |
− | + | } | |
+ | } | ||
+ | /*---- | ||
+ | TO DO: | ||
+ | replace $gRegexMatches with return array | ||
+ | make this function static | ||
+ | */ | ||
+ | public function CheckRegex($iPattern,$iText) { | ||
+ | global $gRegexMatches,$strDbg; | ||
+ | |||
+ | $chDelim = '/'; | ||
+ | $strPattCk = $iPattern; | ||
+ | // (2010-08-17) this next line causes incorrect handling of escaped characters in the filter | ||
+ | //$strPattCk = str_replace('\\','\\\\',$strPattCk); // make sure filter backslashes are prefixed to be literal | ||
+ | $strPattCk = str_replace($chDelim,'\\'.$chDelim,$strPattCk); | ||
+ | unset($php_errormsg); // TO DO: explain this | ||
+ | $strFinal = $chDelim.$strPattCk.$chDelim; | ||
+ | $strDbg .= "'''@preg_match'''(\"$strFinal\",\"$iText\",...)"; | ||
+ | $isMatch = @preg_match($strFinal,$iText,$gRegexMatches); | ||
+ | |||
+ | return $isMatch; | ||
+ | } | ||
+ | public function AddErrorLine($iText) { | ||
+ | global $gErrorText; | ||
+ | |||
+ | $gErrorText .= $iText."\n"; | ||
+ | } | ||
+ | public function ReportErrors() { | ||
+ | global $wgUser; | ||
+ | global $wgEmergencyContact; | ||
+ | global $gErrorText; | ||
+ | |||
+ | if ($gErrorText) { | ||
+ | $msgEmail = 'Filter error report for user '.$wgUser->getName().":\n\n"; | ||
+ | $msgEmail .= $gErrorText; | ||
+ | mail($wgEmergencyContact,'spamferret filter error',$msgEmail); | ||
+ | } | ||
+ | } | ||
+ | /*---- | ||
+ | ACTION: Create a new record for the current client | ||
+ | */ | ||
+ | public function CreateClient() { | ||
+ | $sql = 'INSERT INTO client (Address,WhenFirst,Count,Retries) VALUES("'.$this->strIPAddr.'",NOW(),1,0)'; | ||
+ | $this->dbSpam->Exec($sql); | ||
+ | } | ||
+ | /*---- | ||
+ | ACTION: Update a client's record to reflect a new spam attempt | ||
+ | */ | ||
+ | public function RecordClientSpam() { | ||
+ | if ($this->doClearThrottle) { | ||
+ | $strRetries = '0'; | ||
+ | } else { | ||
+ | $strRetries = 'Retries+1'; | ||
+ | } | ||
+ | $sql = 'UPDATE client SET WhenLast=NOW(),Count=Count+1, Retries='.$strRetries.' WHERE Address="'.$this->strIPAddr.'"'; | ||
+ | $this->dbSpam->Exec($sql); | ||
+ | if ($this->dbSpam->RowsAffected() < 1) { | ||
+ | $this->CreateClient(); | ||
+ | $this->AddErrorLine('Record not found for client '.$this->strIPAddr); | ||
+ | } | ||
+ | } | ||
+ | /*----- | ||
+ | INPUT: | ||
+ | $this->idPattern | ||
+ | $iarArgs['edit-raw'] | ||
+ | */ | ||
+ | public function RecordAttempt(array $iarArgs) { | ||
+ | global $wgTitle, $wgServer; | ||
+ | |||
+ | $iCode = $iarArgs['code']; | ||
+ | $txtDiff = $iarArgs['diff']; // was $this->txtDiff | ||
+ | $txtEdit = $iarArgs['edit-raw']; // was $this->txtEditChk | ||
+ | $this->RecordClientSpam(); | ||
+ | |||
+ | $sqlCode = '"'.$iCode.'"'; | ||
+ | $sqlURL = '"'.$this->dbSpam->SafeParam($wgTitle->getFullURL()).'"'; | ||
+ | $sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"'; | ||
+ | $sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"'; | ||
+ | $sqlEdit = '"'.$this->dbSpam->SafeParam($txtEdit).'"'; | ||
+ | $sqlDiff = '"'.$this->dbSpam->SafeParam($txtDiff).'"'; | ||
+ | $sqlPattern = $this->idPattern; | ||
+ | if (is_null($sqlPattern)) { | ||
+ | $sqlPattern = 'NULL'; | ||
+ | $isPattern = FALSE; | ||
+ | } else { | ||
+ | $isPattern = TRUE; | ||
+ | } | ||
+ | $sqlAddr = '"'.$this->strIPAddr.'"'; | ||
+ | //$sqlMatch = is_null($iMatch)?'NULL':'"'.$iMatch.'"'; | ||
+ | $sql = 'INSERT INTO attempt (`When`,ID_Pattern,Addr_Client,IDS_Session,PageServer,PageName,Code,didAllow,Edit,Diff) VALUES' | ||
+ | .'(NOW(),' | ||
+ | .$sqlPattern.',' | ||
+ | .$sqlAddr.',' | ||
+ | .SQL_Value(session_id()).',' | ||
+ | .$sqlSrvr.',' | ||
+ | .$sqlPage.',' | ||
+ | .$sqlCode.',' | ||
+ | .'FALSE,' | ||
+ | .$sqlEdit.',' | ||
+ | .$sqlDiff.')'; | ||
+ | $ok = $this->dbSpam->Exec($sql); | ||
+ | if ($ok !== TRUE) { | ||
+ | $this->AddErrorLine('SQL ['.$sql.'] in RecordAttempt() generated this error: '.$ok); | ||
+ | } | ||
+ | if ($isPattern) { | ||
+ | $sql = 'UPDATE patterns SET WhenTried=NOW(), Count=Count+1 WHERE ID='.$this->idPattern; | ||
+ | $ok = $this->dbSpam->Exec($sql); | ||
+ | if ($ok !== TRUE) { | ||
+ | $this->AddErrorLine('SQL ['.$sql.'] generated this error: '.$ok); | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | /* | ||
+ | INPUT: | ||
+ | $this->txtEditChk | ||
+ | */ | ||
+ | public function RecordOkEdit(array $iarArgs) { | ||
+ | global $wgTitle, $wgServer; | ||
− | + | $txtDiff = $iarArgs['diff']; // was $this->txtDiff | |
− | + | $txtEdit = $iarArgs['edit-to-check']; // was $this->txtEditChk | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | $sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"'; | |
− | + | $sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"'; | |
− | + | $sqlEdit = '"'.$this->dbSpam->SafeParam($txtEdit).'"'; | |
− | + | $sqlDiff = '"'.$this->dbSpam->SafeParam($txtDiff).'"'; | |
− | + | $sql = 'INSERT INTO attempt (`When`,ID_Pattern,Addr_Client,IDS_Session,PageServer,PageName,Code,didAllow,Edit,Diff) VALUES ' | |
− | + | .'(NOW(),NULL,' | |
− | + | .'"'.$this->strIPAddr.'",' | |
− | + | .SQL_Value(session_id()).',' | |
− | + | .$sqlSrvr.',' | |
− | + | .$sqlPage.',"ok",TRUE,' | |
− | + | .$sqlEdit.',' | |
− | + | .$sqlDiff.')'; | |
− | + | $ok = $this->dbSpam->Exec($sql); | |
− | + | if ($ok !== TRUE) { | |
− | + | $this->AddErrorLine('SQL ['.$sql.'] in RecordOkEdit() generated this error: '.$ok); | |
− | + | } | |
− | + | } | |
} | } | ||
function ErrorHandler ($errno ,$errstr) { | function ErrorHandler ($errno ,$errstr) { | ||
− | + | global $errNum, $errStr; | |
+ | |||
+ | $errNum = $errno; | ||
+ | $errStr = $errstr; | ||
+ | } | ||
+ | |||
+ | function SQL_Value($iVar) { | ||
+ | if (is_null($iVar)) { | ||
+ | return 'NULL'; | ||
+ | } else { | ||
+ | if (is_numeric($iVar)) { | ||
+ | return $iVar; | ||
+ | } else { | ||
+ | return '"'.$iVar.'"'; | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | function FigureDiff($iTextOld, $iTextNew) { | ||
+ | # Make temporary files | ||
+ | // $td = wfTempDir(); | ||
+ | $td = session_save_path(); | ||
+ | $fhOld = fopen( $fnOld = tempnam( $td, 'SpamFerret-old-' ), 'w' ); | ||
+ | $fhNew = fopen( $fnNew = tempnam( $td, 'SpamFerret-new-' ), 'w' ); | ||
+ | |||
+ | fwrite( $fhOld, $iTextOld ); fclose( $fhOld ); | ||
+ | fwrite( $fhNew, $iTextNew ); fclose( $fhNew ); | ||
− | + | $ksSpamFerretDiffOptions = '-i -E --suppress-common-lines '; // this setting applies to all uses of a given filter DB | |
− | + | // can change for different DBs | |
+ | $cmd = 'diff '.$ksSpamFerretDiffOptions.wfEscapeShellArg( $fnOld, $fnNew ); | ||
+ | $handle = popen( $cmd, 'r' ); | ||
+ | $result = ''; | ||
+ | do { | ||
+ | $data = fread( $handle, 8192 ); | ||
+ | if ( strlen( $data ) == 0 ) { | ||
+ | break; | ||
+ | } | ||
+ | $result .= $data; | ||
+ | } while ( true ); | ||
+ | pclose( $handle ); | ||
+ | unlink( $fnNew ); unlink( $fnOld ); | ||
+ | //return 'cmd=['.$cmd.'] diff=['.$result.']'; | ||
+ | return $result; | ||
} | } | ||
} // end of 'MEDIAWIKI' check | } // end of 'MEDIAWIKI' check | ||
− |
Latest revision as of 17:49, 26 April 2011
{{#lst:SpamFerret|navbar}}: SpamFerret.php
Code
<php><?php /*
HISTORY: 2007-08-19 (Wzl) fixed line 155 call to clsDatabase::Query() (method deprecated and removed) 2007-09-30 (Wzl) fixing regex processing 2007-10-11 (Wzl) logging ampersandbot attempts; spam throttle (automatic temporary blacklist) 2007-10-13 (Wzl) fixed some issues which were preventing throttling from working - mainly changes to SQL 2007-10-15 (Wzl) "Code" wasn't being recorded. Decided that normal filtering should use code '-' so NULL means something is wrong. 2007-10-28 (Wzl) Events with THR and AMP codes *still* weren't being recorded because method call was improperly formatted. 2007-12-23 (Wzl) Emails wikimaster if eregi() returns an error (due to improperly formatted regex) 2007-12-26 (Wzl) Spam turd rejection / logging 2007-12-27 (Wzl) Fixed spam turd detection to work for new pages too (will probably need refinement) 2007-12-27 (Wzl) Ooops. Replaced missing if-block from regex results when inserted text is found. 2008-08-29 (Wzl) Added permanent IP blocking 2008-09-04 (Wzl) Added (optional) logging of successful edits 2008-09-19 (Wzl) Actually *set* the "didEdit" flag for successful edits <facepalms> 2008-10-21 (Wzl) Fixed minor syntax error in "defines" 2009-02-25 (Wzl) $objArticleCurr->loadLastEdit() now causes error in MW 1.14 (was it necessary before?) 2009-03-10 (Wzl) "require" -> "require_once" so other extensions can use data.php without conflict also optional $kfpWzlLibs so data.php can be somewhere not on the path 2009-03-18 (Wzl) Got rid of shared.php requirement; now using kfpLib to locate data.php 2009-03-26 (Wzl) Rewrote data library calls to use new classes (no longer using deprecated/removed classes) Also modified to use newer function hooks 2009-04-24 (Wzl) fixed "strict" bug referencing unset $txtCurr when creating new page 2009-07-05 (Wzl) Using LibMgr 2009-07-14 (Wzl) Added attempts.Diff field, patterns.isDIff 2009-07-15 (Wzl) On advice from FreeNode##php, changed from eregi() to preg_match() Added option to match diff results instead of submitted edit only Saves diff of each change, approved or not BUG: approved edits are not being logged properly; using "OK" code and logging as failed 2009-07-26 (Wzl) fixed minor warning error on line 252 2009-08-07 (Wzl) email notification working; removed TRD and AMP hard-coded offenses, to be redone as isDiff filters if needed 2009-08-08 (Wzl) create client record immediately if client is not recognized; don't depend on spam filter being triggered 2009-08-09 (Wzl) restructured "client" and "attempt" tables (was "clients", "attempts"); not backwards-compatible 2010-02-24 (Wzl) some code-tidying; trying to restrict passing of data between methods to single array var in args/return 2010-08-17 (Wzl) added some debug code to CheckRegex(); fixed problem with escaped chars in filter 2011-04-26 (Wzl) minor bug fixes; hand-merge with version on htyp.org TO DO: * Throttled save attempts should check for spam, just for data-gathering purposes. Possibly non-spam from a throttled IP should not update the "WhenLast" timestamp. Maybe this should be a LocalSettings option? * Figure out how to display a different error message than "the following text is what triggered our filter:" OPTIONAL SETTINGS: kfpLib - path to data.php folder (no final slash) kfsLib_Data - filespec of data.php
- /
- Loader for spam blacklist feature
- Include this from LocalSettings.php
if ( defined( 'MEDIAWIKI' ) ) {
$wgExtensionCredits['other'][] = array( 'name' => 'SpamFerret', 'author' => 'Woozle Staddon', 'url' => 'http://htyp.org/SpamFerret', 'version' => '2011-04-26', 'description' => 'database-driven wikispam blocker', ); if (!defined('kfsLib_Data')) {
if (defined('kfpLib')) {
define('kfsLib_Data', kfpLib.'/data.php');
} else {
define('kfsLib_Data','data.php'); // assume it's on the path
}
} if (!defined('LIBMGR')) {
require('libmgr.php');
} clsLibMgr::Add('data', kfsLib_Data,__FILE__,__LINE__); clsLibMgr::Load('data',__FILE__,__LINE__);
/* ==============
SET UP CALLBACKS
- /
global $wgFilterCallback, $wgPreSpamFilterCallback;
$wgPreSpamFilterCallback = false; if ( defined( 'MW_SUPPORTS_EDITFILTERMERGED' ) ) {
$wgHooks['EditFilterMerged'][] = 'wfSpamFerretMerged';
} else {
if ( $wgFilterCallback ) { $wgPreSpamFilterCallback = $wgFilterCallback; } $wgFilterCallback = 'wfSpamFerretFilter';
}
/* $wgHooks['EditFilter'][] = 'wfSpamFerretValidate'; $wgHooks['ArticleSaveComplete'][] = 'wfSpamFerretArticleSave'; $wgHooks['APIEditBeforeSave'][] = 'wfSpamFerretAPIEditBeforeSave';
- /
/* ================
SET GLOBAL OBJECTS
- /
function GetSpamFerret() {
static $objFerret;
if (!isset($objFerret)) {
$objFerret = new SpamFerret();
} return $objFerret;
} /* ================
CALLBACK FUNCTIONS
- /
/**
* Hook function for $wgFilterCallback */
//function wfSpamFerretFilter( &$title, $text, $section, &$hookErr, $editSummary ) { function wfSpamFerretFilter( &$title, $text, $section ) {
global $wgOut;
$spamObj = GetSpamFerret(); $wgOut->addWikiText( "Intercepted by SpamFerretFilter" );
// $ret = $spamObj->filter( $title, $text, , $editSummary, $editPage );
return $spamObj->filter( $title, $text, $section );
}
/**
* Hook function for EditFilterMerged, replaces wfSpamBlacklistFilter */
function wfSpamFerretMerged( $editPage, $text, &$hookErr, $editSummary ) {
global $wgTitle,$wgOut;
if( is_null( $wgTitle ) ) { # API mode # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist return true; }
$spamObj = GetSpamFerret(); $title = $editPage->mArticle->getTitle();
// $ret = $spamObj->filter( $title, $text, , $editSummary, $editPage );
$ret = $spamObj->filter( $title, $text, , $editPage ); if ( $ret !== false ) $editPage->spamPage( $ret );
// additional text can be added here: // $wgOut->addWikiText( "Intercepted by SpamFerretMerged" );
// Return convention for hooks is the inverse of $wgFilterCallback return ( $ret === false );
} /**
* Hook function for APIEditBeforeSave */
function wfSpamFerretAPIEditBeforeSave( &$editPage, $text, &$resultArr ) { }
class SpamFerret {
var $previousFilter = false;
// internal data
var $dbSpam; var $objDataClients; var $strIPAddr; var $idPattern; var $doClearThrottle; function Setting($iName) { global $wgSpamFerretSettings;
return $wgSpamFerretSettings[$iName]; }
/*----- PROPERTIES USED: $this->idPattern (out) */ function filter( &$title, $text, $section, $editPage = FALSE ) { global $wgArticle, $wgDBname, $wgMemc, $messageMemc, $wgVersion, $wgOut; global $wgTitle, $wgServer; global $debug; global $errNum, $errStr; global $gErrorText; // debugging: global $sql;
$fname = 'wfSpamFerretFilter'; wfProfileIn( $fname );
ini_set('track_errors', 1);
# Call the rest of the hook chain first if ( $this->previousFilter ) {
$f = $this->previousFilter; if ( $f( $title, $text, $section ) ) { wfProfileOut( $fname ); return true; }
}
// initialize variables
$retVal = FALSE; // default = assume edit is ok $gErrorText = FALSE; $isClientKnown = FALSE;
$this->txtEditRaw = $text; // DEPRECATED $arArgs['edit-raw'] = $text;
// get the IP address of the http client making the edit attempt:
$this->strIPAddr = wfGetIP();
// Open the database
$this->OpenDatabase();
// open clients table (extended Throttle version) for reference: //return TRUE;
$objTblClients = new clsTable($this->dbSpam); $objTblClients->Name('ClientThrottle2'); $objTblClients->KeyName('Address');
// Look up to see if this IP is known; it may already be throttled:
$this->objDataClients = $objTblClients->GetData('Address="'.$this->strIPAddr.'"'); if (is_object($this->objDataClients)) {
if ($this->objDataClients->hasRows()) { $isClientKnown = TRUE; }
} if ($isClientKnown) {
$this->objDataClients->FirstRow(); $doBlock = $this->objDataClients->doBlock; if ($doBlock) { $strThrType = 'BLK'; } else { $intRetries = $this->objDataClients->Retries; $intThrottle = $this->Setting('throttle_retries'); $doBlock = $intRetries > $intThrottle; $strThrType = 'THR-'.$intRetries; } if ($doBlock) { $arArgs['diff'] = NULL; // not applicable // retry limit exceeded; check timeout limit if ($this->objDataClients->ThrottleTime < $this->Setting('throttle_timeout')) { $txtMsg = 'Too many spam attempts from your IP address ('.$this->strIPAddr.'). Please come back later.'; if (is_object($editPage)) { $editPage->spamPage($txtMsg); } else { EditPage::spamPage($txtMsg); // older MW code doesn't supply $editPage } $arArgs['code'] = $strThrType; $this->RecordAttempt($arArgs); // record post attempt by throttled client $retVal = true; // client has exceeded spam limit; impose throttle } else { $this->doClearThrottle = true; } }
} else {
$this->CreateClient();
}
//$this->txtDiff = 'N/A'; $arArgs['diff'] = 'N/A';
if (!$retVal) { $arRtn = $this->GetDiff($title); // get the diff between edit and current contents $arArgs['diff'] = $arRtn['diff']; $arArgs['doAll'] = FALSE; $arRtn = $this->CheckFilters($arArgs); $arArgs['edit-to-check'] = $arRtn['edit-to-check']; if ( $this->isMatch ) { // spam cue found; display the matching text and don't allow the edit to be saved: wfDebug( "Match!\n" );
// The string sent to spamPage() will be shown after "The following text is what triggered our spam filter:" $retVal = '(pattern #'.$this->idPattern.') ['.htmlspecialchars($this->strMatch).']'; // Log the spam attempt: $arArgs['code'] = '-'; $this->RecordAttempt($arArgs); } else { // no spam cues found; allow the edit to be saved, if nothing else has tripped the filter //if ($this->Setting('log_ok_edits')) { $this->idPattern = NULL; $this->RecordOkEdit($arArgs); //$this->RecordAttempt('OK'); //} } }
wfProfileOut( $fname ); $this->ReportErrors(); //$wgOut->addHTML($out); return $retVal; /**/
} public function OpenDatabase() { $this->dbSpam = new clsDatabase($this->Setting('dbspec')); $this->dbSpam->Open(); } public function FiltTbl() { $doLoad = TRUE; $doLoad = empty($this->objFilts); if ($doLoad) { $objTbl = new clsTable($this->dbSpam);
$objTbl->Name('patterns'); $objTbl->KeyName('ID');
$this->objFilts = $objTbl; } return $this->objFilts; } /*----- INPUT: $iTitle - page for comparing proposed edit $this->txtEditRaw (DEPRECATED; use $iarArgs['text-to-check'] OUTPUT: return ['diff'] */ public function GetDiff($iTitle) { $objArticleCurr = new Article($iTitle); if ($objArticleCurr->exists()) { $txtCurr = $objArticleCurr->getContent(); $txtDiff = FigureDiff($txtCurr,$this->txtEditRaw); } else { $txtDiff = '!!NEW: '.$this->txtEditRaw; } //$this->txtDiff = $txtDiff; $arOut['diff'] = $txtDiff; return $arOut;
}
/*----- INPUT:
$iarArgs['doAll'] $iarArgs['diff'] $this->txtEditRaw - DEPRECATED; use $iarArgs
OUTPUT:
$this->idPattern
*/ public function CheckFilters(array $iarArgs) { global $gRegexMatches,$gFilterMatches,$gFilterRows,$gFilterCount; global $debug;
assert('is_object($this->dbSpam)');
$doCheckAll = $iarArgs['doAll']; $strChkDiff = strtolower(nz($iarArgs['diff'])); $objFiltTbl = $this->FiltTbl();
if ($doCheckAll) {
$sqlFilt = NULL;
} else {
$sqlFilt = 'isActive';
}
$objFiltRows = $objFiltTbl->GetData($sqlFilt); $objRow = $objFiltRows; // for shorthand
$strTextEdit = strtolower($this->txtEditRaw); //$this->txtEditChk = $strTextEdit; // text after being massaged for checking $arOut['edit-to-check'] = $strTextEdit; // text after being massaged for checking $this->isMatch = FALSE; $gFilterCount = 0; $gFilterRows = $objRow->RowCount();
while($objRow->NextRow() && (!$this->isMatch || $doCheckAll)) { $isMatch = FALSE;
if ($objRow->isDiff) {
$strTextCk = $strChkDiff;
} else {
$strTextCk = $strTextEdit;
}
if (!is_null($strTextCk)) {
$gFilterCount++; $strPattern = strtolower($objRow->Pattern); $isRegex = $objRow->isRegex; $this->idPattern = $objRow->ID; if ($isRegex) { $isMatch = $this->CheckRegex($strPattern,$strTextCk);
if (isset($php_errormsg)) { $this->AddErrorLine('Filter #'.$this->idPattern.' generated error "'.$php_errormsg); }
if ($isMatch) { $this->strMatch = $gRegexMatches[0]; } } else { if (empty($strPattern)) { $isMatch = FALSE; } else { $this->strMatch = stristr($strTextCk,$strPattern); $isMatch = ($this->strMatch != ); } } if ($isMatch) { $this->isMatch = TRUE; if ($doCheckAll) { $gFilterMatches[$this->idPattern] = $this->strMatch; } }
} } } /*---- TO DO: replace $gRegexMatches with return array make this function static */ public function CheckRegex($iPattern,$iText) { global $gRegexMatches,$strDbg;
$chDelim = '/'; $strPattCk = $iPattern; // (2010-08-17) this next line causes incorrect handling of escaped characters in the filter //$strPattCk = str_replace('\\','\\\\',$strPattCk); // make sure filter backslashes are prefixed to be literal $strPattCk = str_replace($chDelim,'\\'.$chDelim,$strPattCk); unset($php_errormsg); // TO DO: explain this $strFinal = $chDelim.$strPattCk.$chDelim; $strDbg .= "@preg_match(\"$strFinal\",\"$iText\",...)"; $isMatch = @preg_match($strFinal,$iText,$gRegexMatches);
return $isMatch; } public function AddErrorLine($iText) { global $gErrorText;
$gErrorText .= $iText."\n"; } public function ReportErrors() { global $wgUser; global $wgEmergencyContact; global $gErrorText;
if ($gErrorText) {
$msgEmail = 'Filter error report for user '.$wgUser->getName().":\n\n"; $msgEmail .= $gErrorText; mail($wgEmergencyContact,'spamferret filter error',$msgEmail);
} } /*---- ACTION: Create a new record for the current client */ public function CreateClient() { $sql = 'INSERT INTO client (Address,WhenFirst,Count,Retries) VALUES("'.$this->strIPAddr.'",NOW(),1,0)'; $this->dbSpam->Exec($sql); } /*---- ACTION: Update a client's record to reflect a new spam attempt */ public function RecordClientSpam() { if ($this->doClearThrottle) { $strRetries = '0'; } else { $strRetries = 'Retries+1'; } $sql = 'UPDATE client SET WhenLast=NOW(),Count=Count+1, Retries='.$strRetries.' WHERE Address="'.$this->strIPAddr.'"'; $this->dbSpam->Exec($sql); if ($this->dbSpam->RowsAffected() < 1) { $this->CreateClient(); $this->AddErrorLine('Record not found for client '.$this->strIPAddr); } } /*----- INPUT: $this->idPattern $iarArgs['edit-raw'] */ public function RecordAttempt(array $iarArgs) { global $wgTitle, $wgServer;
$iCode = $iarArgs['code']; $txtDiff = $iarArgs['diff']; // was $this->txtDiff $txtEdit = $iarArgs['edit-raw']; // was $this->txtEditChk $this->RecordClientSpam();
$sqlCode = '"'.$iCode.'"'; $sqlURL = '"'.$this->dbSpam->SafeParam($wgTitle->getFullURL()).'"'; $sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"'; $sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"'; $sqlEdit = '"'.$this->dbSpam->SafeParam($txtEdit).'"'; $sqlDiff = '"'.$this->dbSpam->SafeParam($txtDiff).'"'; $sqlPattern = $this->idPattern; if (is_null($sqlPattern)) { $sqlPattern = 'NULL'; $isPattern = FALSE; } else { $isPattern = TRUE; } $sqlAddr = '"'.$this->strIPAddr.'"'; //$sqlMatch = is_null($iMatch)?'NULL':'"'.$iMatch.'"'; $sql = 'INSERT INTO attempt (`When`,ID_Pattern,Addr_Client,IDS_Session,PageServer,PageName,Code,didAllow,Edit,Diff) VALUES' .'(NOW(),' .$sqlPattern.',' .$sqlAddr.',' .SQL_Value(session_id()).',' .$sqlSrvr.',' .$sqlPage.',' .$sqlCode.',' .'FALSE,' .$sqlEdit.',' .$sqlDiff.')'; $ok = $this->dbSpam->Exec($sql); if ($ok !== TRUE) { $this->AddErrorLine('SQL ['.$sql.'] in RecordAttempt() generated this error: '.$ok); } if ($isPattern) { $sql = 'UPDATE patterns SET WhenTried=NOW(), Count=Count+1 WHERE ID='.$this->idPattern; $ok = $this->dbSpam->Exec($sql); if ($ok !== TRUE) {
$this->AddErrorLine('SQL ['.$sql.'] generated this error: '.$ok);
} } } /* INPUT: $this->txtEditChk */ public function RecordOkEdit(array $iarArgs) { global $wgTitle, $wgServer;
$txtDiff = $iarArgs['diff']; // was $this->txtDiff $txtEdit = $iarArgs['edit-to-check']; // was $this->txtEditChk
$sqlPage = '"'.$this->dbSpam->SafeParam($wgTitle->getPrefixedText()).'"'; $sqlSrvr = '"'.$this->dbSpam->SafeParam($wgServer).'"'; $sqlEdit = '"'.$this->dbSpam->SafeParam($txtEdit).'"'; $sqlDiff = '"'.$this->dbSpam->SafeParam($txtDiff).'"'; $sql = 'INSERT INTO attempt (`When`,ID_Pattern,Addr_Client,IDS_Session,PageServer,PageName,Code,didAllow,Edit,Diff) VALUES ' .'(NOW(),NULL,' .'"'.$this->strIPAddr.'",' .SQL_Value(session_id()).',' .$sqlSrvr.',' .$sqlPage.',"ok",TRUE,' .$sqlEdit.',' .$sqlDiff.')'; $ok = $this->dbSpam->Exec($sql); if ($ok !== TRUE) { $this->AddErrorLine('SQL ['.$sql.'] in RecordOkEdit() generated this error: '.$ok); } }
}
function ErrorHandler ($errno ,$errstr) {
global $errNum, $errStr;
$errNum = $errno; $errStr = $errstr;
}
function SQL_Value($iVar) {
if (is_null($iVar)) { return 'NULL'; } else { if (is_numeric($iVar)) { return $iVar; } else { return '"'.$iVar.'"'; } }
} function FigureDiff($iTextOld, $iTextNew) {
# Make temporary files
// $td = wfTempDir();
$td = session_save_path(); $fhOld = fopen( $fnOld = tempnam( $td, 'SpamFerret-old-' ), 'w' ); $fhNew = fopen( $fnNew = tempnam( $td, 'SpamFerret-new-' ), 'w' );
fwrite( $fhOld, $iTextOld ); fclose( $fhOld ); fwrite( $fhNew, $iTextNew ); fclose( $fhNew );
$ksSpamFerretDiffOptions = '-i -E --suppress-common-lines '; // this setting applies to all uses of a given filter DB // can change for different DBs $cmd = 'diff '.$ksSpamFerretDiffOptions.wfEscapeShellArg( $fnOld, $fnNew ); $handle = popen( $cmd, 'r' ); $result = ; do { $data = fread( $handle, 8192 ); if ( strlen( $data ) == 0 ) { break; } $result .= $data; } while ( true ); pclose( $handle ); unlink( $fnNew ); unlink( $fnOld ); //return 'cmd=['.$cmd.'] diff=['.$result.']'; return $result;
}
} // end of 'MEDIAWIKI' check