/** This is a PROOF OF CONCEPT implementation.
* Use at your own risk, do with it as you please.
*
* by Daniel Kinzler, BrightByte.de
*/
/** function for debug output. Replace references to this function
* with something better, or remove them.
*/
function trace($msg) {
print "\n\t
$msg
\n";
}
/** function for warning/error output. Replace references to this function
* with something better.
*/
function warn($msg) {
print "\n\t$msg
\n";
}
/** helper-function for decoding html-entites. html_entity_decode aolne does not do the trick,
* as it handles numeric entity-refs poorly.
*/
function decode_entities($text) {
$text= html_entity_decode($text,ENT_QUOTES,"ISO-8859-1"); #NOTE: UTF-8 does not work!
$text= preg_replace('/(\d+);/me',"chr(\\1)",$text); #decimal notation
$text= preg_replace('/([a-f0-9]+);/mei',"chr(0x\\1)",$text); #hex notation
return $text;
}
/** Heuristig for detecting files that *could* be interpretet as HTML by some browser.
* There seems to be an experimental implementation for this in mediawiki, so use that.
* The present will produce false positives in many situations.
*/
function is_htmlish($file) {
$data= file_get_contents($file); #load file. maybe create a cache so we don't do this more than once.
if (!$data) return False;
#NOTE: decoding entities is not neccessary
if (eregi("&1",$output,$code);
$exit_code= $code; #remeber for user feedback
if ($virus_scanner_codes) { #map codes
if (isset($virus_scanner_codes[$code])) $code= $virus_scanner_codes[$code]; #explicite mapping
else if (isset($virus_scanner_codes["*"])) $code= $virus_scanner_codes["*"]; #fallback mapping
}
if ($code===False) { #scan failed (code was mapped to False by $virus_scanner_codes)
if ($reject_on_failed_scan) return "*** SCAN FAILED (code $exit_code) ***";
else {
warn("failed to scan $file (code $exit_code).");
return NULL;
}
}
else if ($code===-1) { #scan failed because filetype is unknown (probably imune)
warn("unsupported file type $file (code $exit_code).");
return NULL;
}
else if ($code===0) return False; #no virus found
else { #VIRUS FOUND, return full output
$output= join("\n",$output);
$output= trim($output);
if (!$output) $output= True; #if ther's no output, return True
return $output;
}
}
/** Guess the mime type of a file. There probably already is something like this
* in the mediawiki code, so use what you have. The present implementation relies on
* the GNU "file" utility which in unavailable on some platforms.
*/
function guess_mime_type($file) {
$fn= escapeshellarg($file);
$m= `file -bi $fn`;
if (!$m) return False; #failed (file not found?)
#normalize
if (!$full) $m= preg_replace('![;, ].*$!','',$m); #strip charset, etc
$m= trim($m);
$m= strtolower($m);
return $m;
}
/** determines if a given MIME-type is forbidden. Use the method already present
* in the mediawiki instead. The present implemenation relies on $forbidden_mime.
*/
function is_forbidden_mime($mime) {
global $forbidden_mime;
if (!$forbidden_mime) return False; #no forbiden mime types
if (in_array($mime,$forbidden_mime)) return True; #is forbidden
$m= explode("/",$mime); #split major/minor
if ($m[0]) {
if (in_array($m[0]."/*",$forbidden_mime)) return True; #check default-rule for major type
}
if ($m[1]) {
if (in_array("*/".$m[1],$forbidden_mime)) return True; #check default-rule for minor type
}
return False; #allowed
}
/** Determines if a given MIME-type matches the extension of a given filename.
* Use the method already present in the mediawiki instead.
* The present implemenation relies on $mime_to_ext.
*/
function bad_file_extension($file,$mime) {
global $mime_to_ext;
if (!$mime_to_ext) return False; #no mime-to-extension map defined
#get extension
$file= basename($file);
$idx= strrpos($file,".");
if ($idx===False) return True; #no dot found, that's always bad.
$e= substr($file,$idx+1);
if (!$e) return True; #name ends with a dot, that's always bad.
$ext= $mime_to_ext[$mime];
if (!$ext) return False; #no extensions known for the type, let it pass.
$e= strtolower($e);
if (in_array($e,$ext)) return False; #see if the extension is in the list
return True; #extension is bad.
}
/** Top-level-function for checking a file. Performs multiple checks analysing
* the MIME-type, filename and the contents of the file.
*/
function is_good_file($file,$name,$mime_from_browser=NULL) {
#check if the browser reported a forbidden mime type
#this check may or may not be a good idea...
if ($mime_from_browser) trace("checking is_forbidden_mime($mime_from_browser) [browser guess]");
else trace("no mime-type reported by the browser");
if ($mime_from_browser and is_forbidden_mime($mime_from_browser)) {
warn("Rejected: Forbidden MIME-Type $mime_from_browser [browser guess]!");
return False;
}
#guess mime type
$mime= guess_mime_type($file);
#check if we guessed a forbidden mime type
if ($mime) trace("checking is_forbidden_mime($mime) [server guess]");
else trace("no mime-type guessed");
if ($mime and is_forbidden_mime($mime)) {
warn("Rejected: Forbidden MIME-Type $mime [server guess]!");
return False;
}
#check if the file extension matches the mime type we guessed
if ($mime) trace("checking bad_file_extension($name,$mime) [server guess]");
else trace("no mime-type guessed");
if ($mime and bad_file_extension($name,$mime)) {
warn("Rejected: MIME-Type $mime [server guess] mismatches file-extension!");
return False;
}
#see if the file could be interpretet as HTML by some browsers
trace("checking is_htmlish($file)");
if (is_htmlish($file)) {
warn("Rejected: May look like HTML to some browsers!");
return False;
}
#see if the file contains scripting code
trace("checking is_scripted($file)");
if (is_scripted($file)) {
warn("Rejected: Contains Javascript!");
return False;
}
#scan the file for viruses, using an external scanner
trace("checking is_viral($file)");
$viral= is_viral($file);
if ($viral) {
warn("Rejected: VIRUS FOUND: $viral");
return False;
}
trace("all is well");
return True;
}
?>