Help
Press Page
Your Privacy
Search
|
|
|
|
Search
"url" (path to directory and URL which corresponds to directory)
//remember to include the trailing / on the URL
$directories = array(
"/usr/local/apache/htdocs" => "/",
"/usr/local/apache/htdocs/about" => "/about/",
"/usr/local/apache/htdocs/events" => "/events/",
"/usr/local/apache/htdocs/events/expo" => "/events/expo/",
"/usr/local/apache/htdocs/howTo" => "/howTo/",
"/usr/local/apache/htdocs/howTo/unclechin" => "/howTo/unclechin/",
"/usr/local/apache/htdocs/howTo/updates" => "/howTo/updates/",
"/usr/local/apache/htdocs/legis" => "/legis/",
"/usr/local/apache/htdocs/legis/news" => "/legis/news/",
"/usr/local/apache/htdocs/resources" => "/resources/",
"/usr/local/apache/htdocs" => "/",
);
//Filters for files that should not be included in search results
//These must be set in a PERL style regex
$filter = array(
"/^\.$/", //Filter out . file
"/^\.\.$/", //Filter out .. file
"/^\./", //Filter out Unix hidden files (anything that starts with .)
"/(\.gif|\.jpg|\.exe|\.cnf|\.pwd|\.grp|\.inc|\.cgi|\.js|\.lib|\.pl|\.sh)$/"
);
/* Examples:
To only display .htm or .html add
"/[^(\.htm|\.html)]$/" //Filter out anything not ending in .htm or .html
To prevent the script from scanning through gifs and jpgs add
"/(\.gif|\.jpg)$/" //Filter out .gif and .jpg files
*/
// -- End Environment Variables -- //
Function Check_Filter($filter,$filename)
{
//Checks filename against filters and returns true if no match
$retVal = 1;
for($i=0;$i 1000000) {
// file will exceed ulimit, skip it.
continue;
}
$match = 0;
$fd = fopen($filename, "r");
$contents = fread($fd, filesize ($filename));
fclose($fd);
//Find title of File
if (preg_match("|(.+)|Ui", $contents, $regs )) {
$title = $regs[1];
} else {
//Use Filename if no Title Tag
$title = $filename;
}
//Remove HTML Tags before searching
$search = array ("''si", // Strip out javascript
"'<[\/\!]*?[^<>]*?>'si", // Strip out html tags
"'([\r\n])[\s]+'", // Strip out white space
"'&(quot|#34);'i", // Replace html entities
"'&(amp|#38);'i",
"'&(lt|#60);'i",
"'&(gt|#62);'i",
"'&(nbsp|#160);'i",
"'&(iexcl|#161);'i",
"'&(cent|#162);'i",
"'&(pound|#163);'i",
"'&(copy|#169);'i");
$replace = array ("",
" ",
"\\1",
"\"",
"&",
"<",
">",
" ",
chr(161),
chr(162),
chr(163),
chr(169));
$contents = preg_replace ($search, $replace, $contents);
$contents = preg_replace ("/\W/", " ", $contents);
$contents = preg_replace ("/\s+/", " ", $contents);
//Seperate Each Word into an Array Element and Compare to Keywords
$contents = explode(" ", $contents);
$j = 0;
for($j = 0; $j < count($keywords); $j++)
{
for($k = 0; $k < count($contents); $k++)
{
//compare contents with each keyword
if (!strcasecmp ($contents[$k], $keywords[$j]))
{
$match++;
break;
}
}
}
if ($match == count($keywords) )
{
$counter = count($retVal);
$retVal[$counter][0] = $filename;
$retVal[$counter][1] = $title;
}
}
return $retVal;
}
// -- MAIN --
//Make sure keyword is present and contains at least one non-whitespace character
if (isset($keyword) && preg_match("|\S+|",$keyword))
{
$keywords = explode(" ", $keyword);
$pages = array();
while (list ($key, $val) = each ($directories))
{
$directory = $key;
chdir($directory) or die("Directory $directory Not found");
$filenames = Get_Filenames($directory,$filter);
$found = Keyword_Check($filenames,$keywords);
//add any pages with keywords in current directory to array
for($i = 0;$i < count($found); $i++)
{
$fileloc = "$val".$found[$i][0];
$counter = count($pages);
$pages[$counter][0] = $fileloc;
$pages[$counter][1] = $found[$i][1];;
}
}
$numfound = count($pages);
echo " \n";
echo "$numfound pages matching your query were found";
echo " ";
for ($i = 0; $i < count($pages); $i++)
{
$fileloc = $pages[$i][0];
$title = $pages[$i][1];
echo "$title ";
}
echo " ";
}
if (!isset($parent) || $parent==0) {
?>
| | |