$msg\n"; } /** function for warning/error output. Replace references to this function * with something better. */ function warn($msg) { print "\n\t
$msg
\n"; } /** helper-function for decoding html-entites. html_entity_decode aolne does not do the trick, * as it handles numeric entity-refs poorly. */ function decode_entities($text) { $text= html_entity_decode($text,ENT_QUOTES,"ISO-8859-1"); #NOTE: UTF-8 does not work! $text= preg_replace('/&#(\d+);/me',"chr(\\1)",$text); #decimal notation $text= preg_replace('/&#x([a-f0-9]+);/mei',"chr(0x\\1)",$text); #hex notation return $text; } /** Heuristig for detecting files that *could* be interpretet as HTML by some browser. * There seems to be an experimental implementation for this in mediawiki, so use that. * The present will produce false positives in many situations. */ function is_htmlish($file) { $data= file_get_contents($file); #load file. maybe create a cache so we don't do this more than once. if (!$data) return False; #NOTE: decoding entities is not neccessary if (eregi("&1",$output,$code); $exit_code= $code; #remeber for user feedback if ($virus_scanner_codes) { #map codes if (isset($virus_scanner_codes[$code])) $code= $virus_scanner_codes[$code]; #explicite mapping else if (isset($virus_scanner_codes["*"])) $code= $virus_scanner_codes["*"]; #fallback mapping } if ($code===False) { #scan failed (code was mapped to False by $virus_scanner_codes) if ($reject_on_failed_scan) return "*** SCAN FAILED (code $exit_code) ***"; else { warn("failed to scan $file (code $exit_code)."); return NULL; } } else if ($code===-1) { #scan failed because filetype is unknown (probably imune) warn("unsupported file type $file (code $exit_code)."); return NULL; } else if ($code===0) return False; #no virus found else { #VIRUS FOUND, return full output $output= join("\n",$output); $output= trim($output); if (!$output) $output= True; #if ther's no output, return True return $output; } } /** Guess the mime type of a file. There probably already is something like this * in the mediawiki code, so use what you have. The present implementation relies on * the GNU "file" utility which in unavailable on some platforms. */ function guess_mime_type($file) { $fn= escapeshellarg($file); $m= `file -bi $fn`; if (!$m) return False; #failed (file not found?) #normalize if (!$full) $m= preg_replace('![;, ].*$!','',$m); #strip charset, etc $m= trim($m); $m= strtolower($m); return $m; } /** determines if a given MIME-type is forbidden. Use the method already present * in the mediawiki instead. The present implemenation relies on $forbidden_mime. */ function is_forbidden_mime($mime) { global $forbidden_mime; if (!$forbidden_mime) return False; #no forbiden mime types if (in_array($mime,$forbidden_mime)) return True; #is forbidden $m= explode("/",$mime); #split major/minor if ($m[0]) { if (in_array($m[0]."/*",$forbidden_mime)) return True; #check default-rule for major type } if ($m[1]) { if (in_array("*/".$m[1],$forbidden_mime)) return True; #check default-rule for minor type } return False; #allowed } /** Determines if a given MIME-type matches the extension of a given filename. * Use the method already present in the mediawiki instead. * The present implemenation relies on $mime_to_ext. */ function bad_file_extension($file,$mime) { global $mime_to_ext; if (!$mime_to_ext) return False; #no mime-to-extension map defined #get extension $file= basename($file); $idx= strrpos($file,"."); if ($idx===False) return True; #no dot found, that's always bad. $e= substr($file,$idx+1); if (!$e) return True; #name ends with a dot, that's always bad. $ext= $mime_to_ext[$mime]; if (!$ext) return False; #no extensions known for the type, let it pass. $e= strtolower($e); if (in_array($e,$ext)) return False; #see if the extension is in the list return True; #extension is bad. } /** Top-level-function for checking a file. Performs multiple checks analysing * the MIME-type, filename and the contents of the file. */ function is_good_file($file,$name,$mime_from_browser=NULL) { #check if the browser reported a forbidden mime type #this check may or may not be a good idea... if ($mime_from_browser) trace("checking is_forbidden_mime($mime_from_browser) [browser guess]"); else trace("no mime-type reported by the browser"); if ($mime_from_browser and is_forbidden_mime($mime_from_browser)) { warn("Rejected: Forbidden MIME-Type $mime_from_browser [browser guess]!"); return False; } #guess mime type $mime= guess_mime_type($file); #check if we guessed a forbidden mime type if ($mime) trace("checking is_forbidden_mime($mime) [server guess]"); else trace("no mime-type guessed"); if ($mime and is_forbidden_mime($mime)) { warn("Rejected: Forbidden MIME-Type $mime [server guess]!"); return False; } #check if the file extension matches the mime type we guessed if ($mime) trace("checking bad_file_extension($name,$mime) [server guess]"); else trace("no mime-type guessed"); if ($mime and bad_file_extension($name,$mime)) { warn("Rejected: MIME-Type $mime [server guess] mismatches file-extension!"); return False; } #see if the file could be interpretet as HTML by some browsers trace("checking is_htmlish($file)"); if (is_htmlish($file)) { warn("Rejected: May look like HTML to some browsers!"); return False; } #see if the file contains scripting code trace("checking is_scripted($file)"); if (is_scripted($file)) { warn("Rejected: Contains Javascript!"); return False; } #scan the file for viruses, using an external scanner trace("checking is_viral($file)"); $viral= is_viral($file); if ($viral) { warn("Rejected: VIRUS FOUND: $viral"); return False; } trace("all is well"); return True; } ?>