源代码:
#!/usr/bin/perl
# ___________
= '/home/httpd/elvis/public_html/';
#这里输入你的首页第一页index.htm档的绝对路径,到目录名即可
# The directory location of all your files. Remember the trailing
# slash.
=
http://linux.cqi.com.cn/~elvis/;
#这里输入你的首页网址,後面不可加上index.htm
# The URL corresponding to the base directory.
@files = ('*.htm','*.html','forum/*.htm','mylinks/*.html','wwwboard/*.htm',
'bbs/*.htm','docs/*.htm','docs/*.html',
'guestbook/*.htm',
'guestbook/*.txt',
'friend/*.txt');
#这里输入你的所有档案形式,子目录内的也要
# These are all the files that will be listed. The asterisk is a
# wildcard - it will list all files and directories.
= '/home/httpd/elvis/public_html/search/summaries.htm';
#这里输入summaries.htm档的绝对路径
# Make this writable (chmod 777 summaries.htm) and hide it well!
# It holds the results of everybody's searches so you'll know what
# people are really looking for when they come to your site. We
# have placed our summary file in a non-web directory so others
# can't see it - you could put it in a hidden or secure directory.
=
http://linux.cqi.com.cn/~elvis/';
#这里输入你的首页的网址
= '回到首页';
#这里是你首页名称,改不改无所谓
# Enter the URL and title of your main web page.
= 'on';
#如果担心你的访客有使用不支援Java的浏览器,请将on改成off
# If your visitors can't handle Java, better set this to 'off'.
=
/files/pic/20050817/200581702493651521.gif;
#这里输入search.gif的连结路径(URL)
# The URL of the E3 picture.
=
http://linux.cqi.com.cn/~elvis/search/search.cgi';
#这里输入search.cgi的执行路径
# Change this to the full URL only if your rename this script.
# This array holds info on all the directories and filetypes you'd like
# your visitors to search. Visit the readme file for more customizing
# information.
# Options for Weighted Search:
#
# All occurrences of a search term count as one point. The occurrence
# of a term in the filename, title, META keywords, or META description
# can have added weight (equivalent to a multiplier per hit). Enter
# the multipliers in the array below - the defaults are (2,2,4,2). If
# this makes no sense to you, just ignore it and leave the defaults as
# they are - they work pretty well. Note that this will give extra
# weight to those pages that have a properly formatted title and META
# tags, even if they contain the same basic information.
(, , , ) = (2,2,4,2);
# No further editing is necessary, but feel free to play around...
# Note that much of the code below is straight HTML, and very easy to
# modify if you know a little about HTML programming.
#
# ______________________________________________
read(STDIN,,{ 'CONTENT_LENGTH' });
@pairs = split(/&/,);
foreach (@pairs)
{
(,) = split(/=/,);
=~ tr/+/ /;
=~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
{ } = ;
}
if ({ 'terms' })
{
&get_files;
&search;
&return_html;
}
else
{
&prompt;
}
sub prompt
{
print "Content-type: text/html\n\n";
print <<EOM;
<HTML>
<HEAD>
<TITLE>Oh Yeah Net 全文搜索</TITLE>
<META NAME="description" CONTENT="Try out our internal search engine
for the fastest way to find what you're looking for!">
</HEAD>
<BODY BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#CE0000" ALINK="#000000" VLINK="#880000">
<BR><BLOCKQUOTE>
<IMG SRC="" ALIGN=RIGHT HSPACE=20 WIDTH=253 HEIGHT=144
BORDER=1 ALT="Picture of Search Aircraft (trying to build a theme)">
<B>请输入关键字在本站内搜寻您要的资料,或是阅读
<A HREF="#tips">搜寻秘诀</A> </B><BR>
<FORM METHOD=POST ACTION="" NAME="searchform">
<INPUT TYPE=TEXT NAME="terms" SIZE=30>
<INPUT TYPE=SUBMIT VALUE="搜寻!"><BR>
</BLOCKQUOTE>
EOM
if ( eq 'on')
{
print "<SCRIPT LANGUAGE=\"JavaScript\">\n";
print "<!-- script hiding...\n";
print "document.searchform.terms.focus();\n";
print "// End hiding -->\n";
print "</SCRIPT>\n";
}
print <<EOM;
<A NAME="tips"></A>
</FORM>
<BR><H2><TT>Tips, tips and more tips!</TT></H2>
<BLOCKQUOTE>
Each term may be preceded by the standard Boolean operators
<TT>not</TT>, <TT>and</TT>, or <TT>or</TT>. If you search for
"<TT>dogs not pizzas</TT>", you'll find <I>all</I> documents
containing the word "<TT>dogs</TT>" <I>except</I> those
documents which <I>also</I> contain the word "<TT>pizzas</TT>". If
you type in "<TT>and hot and dog and pizzas</TT>", you'll find
<I>only</I> those documents which contain <I>all three</I> search
terms. The default value is <TT>or</TT>. Thus, a search for
"<TT>hot dog pizzas</TT>" would return pages <I>with at least
one</I> of the three terms.<P>
Altavista's shorthand notation works too. A search on "<TT>dogs
-hot</TT>" is equivalent to the first example, and "<TT>+hot
+dog +pizzas</TT>" will return the same documents as the second.<P>
If a search term has at least one capital letter, like "<TT>parIS</TT>",
the search will be case sensitive with respect to that word - that is,
only documents containing "<TT>parIS</TT>" will be found. On the other
hand, lowercase words like "<TT>paris</TT>" will generate hits
from "<TT>Paris</TT>", "<TT>PARIS</TT>", or "<TT>parIS</TT>".<P>
To group a collection of words, use quotes. For example, the query
<TT>"Zoltan Milosevic"</TT> (quotes included) would not generate a hit
from "Slobodan Milosevic met with Zoltan Smith". Without quotes,
the sentence would count. Boolean operators can also
act on quotations: a search on '<TT>+the +kitten not "the
kitten"</TT>' would return only those documents where
"<TT>the</TT>" and "<TT>kitten</TT>" appear separately.<P>
Intermediate Search finds words, not strings. A search for
"<TT>in</TT>" would turn up only that word, not "<TT>bin</TT>",
"<TT>inside</TT>", or "<TT>acquaintance</TT>". To perform a
string search, preface your term with the dollar sign - a
query on "<TT>\</TT>" would find all words lists above. Note
that more complex wildcard searches using the asterisk are
<I>not</I> permitted. Including the asterisk in your query will
return a list of all files, but that's its only function.<P>
These rules are based on <A HREF="http://www.altavista.digital.com">
Altavista's</A> query syntax; a look at their <A
HREF="http://www.altavista.digital.com/cgi-bin/query?pg=h">
Search Tips</A> may prove useful. The original
Simple Search was created by Matt Wright and can be found at <a
href="http://www.worldwidemart.com/scripts/">Matt's Script
Archive</a>. Like Matt's script, our version is freeware and can
easily be set up on most websites.</BLOCKQUOTE>
<BR><H5 ALIGN=CENTER>
<A HREF=""></A>
<HR SIZE=1 NOSHADE WIDTH=50\%>
Oh Yeah Net 全文搜索 Copyright 1998 中文化 <a href="http://kenwu.asits.net" target="_blank">衣剑舞</a> 整理:<a href="http://oh.yeah.net" target="_blank">Oh Yeah Net</a></H5></BODY></HTML>
EOM
}
sub get_files
{
&bad_base unless (-e );
chdir();
foreach (@files)
{
= `ls `;
@ls = split(/\s+/,);
foreach (@ls) {
if (-d ) {
= "";
if (-T ) {
push(@FILES,);
}
}
elsif (-T ) {
push(@FILES,);
}
}
}
}
sub search
{
# Convert multiple blank spaces to single spaces:
{ 'terms' } =~ s/\s+/ /g;
{ 'terms' } = " { 'terms' } ";
# Convert NOT statements to minus signs:
{ 'terms' } =~ s/ not / -/ig;
# Convert AND statements to plus signs:
{ 'terms' } =~ s/ and / \+/ig;
# Strip OR statements (OR is the default):
{ 'terms' } =~ s/ or / /ig;
# Strip wildcards (bad, bad things!)
= 'true' unless ({ 'terms' } =~ /\*/);
# Correct for grouped entries:
@terms = split(/\"/,{ 'terms' });
= 0;
{ 'terms' } = "";
= '%%%==%%%';
foreach (@terms)
{
# Do some binary-state switching:
if ( == 1)
{ --; }
else
{ ++; }
# The iterator is 0 during grouped states:
=~ s/ //g unless ;
{ 'terms' } .= ;
}
# Done correcting for grouped entries - now all "term1 term2"
# queries have the ugly placeholder holding them together instead
# of blank spaces, so they won't get broken up when we do the
# final splitting by spaces.
@terms = split(/\s+/,{ 'terms' });
foreach (@terms)
{
# Skip null entries (first and last)
next if ( eq '');
# Unmask grouped terms:
=~ s// /g;
if ( =~ /^\+/)
{
=~ s/\+//o;
= '\W' . . '\W' unless ( =~ /^\$/);
=~ s/^\$//;
push(@required,);
= "you bet";
}
elsif ( =~ /^-/)
{
=~ s/-//o;
= '\W' . . '\W' unless ( =~ /^\$/);
=~ s/^\$//;
push(@forbidden,);
= "fraid so";
}
else
{
= '\W' . . '\W' unless ( =~ /^\$/);
=~ s/^\$//;
push(@optional,);
}
}
foreach (@FILES)
{
open(FILE,"");
@LINES = <FILE>;
close(FILE);
= join(' ',@LINES);
=~ s/\n//g;
# Extract the title, if there is one:
if ( =~ /<title>(.*)<\/title>/i)
{
{ } = $1;
for (=1;<;++)
{
.= { };
}
}
{ } = unless { };
# Extract the description, if there is one:
if ( =~ /<meta\s+name="description"\s+content="(.*)>/i)
{
@cut = split(/\">/,$1);
{ } = ;
for (=0;<;++)
{
.= { };
}
}
else
{
= ;
=~ s/<title>(.*)<\/title>//ig;
=~ s/<([^>]|\n)*>//g;
@words = split(/\s+/,);
for (=0;<25;++)
{{ } .= " ";}
{ } .= "...";
}
# Extract the keywords, if they exist:
if ( =~ /<meta\s+name="keywords"\s+content="(.*)>/i)
{
@cut = split(/\">/,$1);
= ;
for (=0;<;++)
{
.= ;
}
}
# Weight the filename as needed:
for (=0;<;++)
{
.= "";
}
# Now that we're done with the special HTML tags, strip HTML tags
# from the file so that they aren't used in the search:
=~ s/<([^>]|\n)*>//g;
# Check for optional terms:
foreach (@optional)
{
= ;
=~ tr/[A-Z]/[a-z]/;
=~ tr/\\w/\\W/;
if (( eq ) && ())
{
{ } = 'yes' if ( =~ //i);
@count = split(//i,);
}
elsif ()
{
{ } = 'yes' if ( =~ //);
@count = split(//,);
}
= @count;
{ } = { } + ;
}
# Done checking for optional terms.
# Check for required terms:
if ( eq "you bet")
{
foreach (@required)
{
= ;
=~ tr/[A-Z]/[a-z]/;
=~ tr/\\w/\\W/;
if (( eq ) && ())
{
if (( =~ //i) && ({ } ne 'no'))
{
{ } = 'yes';
@count = split(//i,);
}
else
{
{ } = 'no';
last;
}
}
elsif ()
{
if (( =~ //) && ({ } ne 'no'))
{
{ } = 'yes';
@count = split(//,);
}
else
{
{ } = 'no';
last;
}
}
= @count;
{ } = { } + ;
}
}
# Done checking for required terms.
# Check for forbidden terms:
if (( eq "fraid so") && ())
{
foreach (@forbidden)
{
= ;
=~ tr/[A-Z]/[a-z]/;
=~ tr/\\w/\\W/;
if (( eq ) && ( =~ //i))
{
{ } = 'no';
last;
}
elsif ( =~ //)
{
{ } = 'no';
last;
}
}
}
# Done checking for forbidden terms.
# Allow for wildcard-triggered listing:
{ } = 'yes' unless ();
# Format for relevance:
if ({ } eq 'yes')
{
= sprintf("%.3f",({ }/1000));
{ "" } = "";
++;
}
} # End loop through all files.
} # End search procedure.
sub return_html
{
# First we build a summary file for the webmaster and the visitor:
= " 个文件符合";
= "1 个文件符合" if ( == 1);
= "没有任何文件符合" unless ();
= "<H2><TT>搜索结果: 找到 </TT></H2>\n";
.= "<BLOCKQUOTE>\n<PRE>\n";
.= " 搜寻关键字: " if (@optional);
= 0;
foreach (@optional)
{
= "<I></I>" unless ( =~ /^\\W/);
=~ s/\\W//g;
.= "";
++;
.= ", " unless ( == @optional);
}
.= "\n Required Terms: " if (@required);
= 0;
foreach (@required)
{
= "<I></I>" unless ( =~ /^\\W/);
=~ s/\\W//g;
.= "";
++;
.= ", " unless ( == @required);
}
.= "\n Forbidden Terms: " if (@forbidden);
= 0;
foreach (@forbidden)
{
= "<I></I>" unless ( =~ /^\\W/);
=~ s/\\W//g;
.= "";
++;
.= ", " unless ( == @forbidden);
}
.= "\n</PRE></BLOCKQUOTE>\n";
open(SUMMARY,">>");
print SUMMARY "Search by { 'REMOTE_HOST' }:<BR>\n";
print SUMMARY ;
close(SUMMARY);
# Now that the webmaster knows what's going on, we print the
# results for the visitor:
print "Content-type: text/html\n\n";
print <<EOM;
<HTML>
<HEAD><TITLE>搜寻结果</TITLE></HEAD>
<BODY BGCOLOR="#FFFFFF" TEXT="#000000" LINK="#0000EE" VLINK="#551A8B" ALINK="#FF0000">
<DL>
EOM
if ( > 0)
{
foreach (reverse sort keys %HITS)
{
= { };
= -s "";
if ( > 1500)
{ = int(/1000) . " K"; }
else
{ = " bytes"; }
= &Last_Modified("");
print "<P><DT><a href=\"\"><STRONG>{ }</STRONG></a></DT>\n";
print "<DD>{ }<BR>\n";
print "<CITE><A HREF=\"\"></A><FONT SIZE=-1>";
print " - - </FONT></CITE></DD>\n";
}
}
else
{
print <<EOM;
<BLOCKQUOTE><B>对不起!本站内找不到您要找的相关资讯。
请重新输入关键字,或察看 <A HREF="?tips">搜寻秘诀</A>
页面</B></BLOCKQUOTE>
EOM
}
print <<EOM;
</DL>
<CENTER>
<BR><BR><FORM METHOD=POST ACTION="">
<INPUT TYPE=TEXT NAME="terms" SIZE=40>
<INPUT TYPE=SUBMIT VALUE="新的搜寻"></FORM>
</CENTER>
<BR><H5 ALIGN=CENTER>
<A HREF="?tips">搜寻秘诀</A> -
<A HREF=""></A>
<HR SIZE=1 NOSHADE WIDTH=50\%>
Oh Yeah Net 全文搜索 Copyright 1998 中文化 <a href="http://www.7dspace.com" target="_blank">衣剑舞</a> 整理 :<a href="http://www.7dspace.com" target="_blank">Oh Yeah Net</a></H5></BODY></HTML>
EOM
}
sub Last_Modified
# This wonderful snippet was written by Jeff Carnahan of Terminal
# Productions (www.terminalp.com)
{
= shift;
(,,,,,,,,) = localtime((stat())[9]);
@months = (Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec);
return " ";
}
sub bad_base
{
print "Content-type: text/html\n\n";
print "I tried to find the base directory you specified:\n";
print "<BLOCKQUOTE><PRE></PRE></BLOCKQUOTE>\n";
print "But the system told me that it did not exist.\n";
exit;
}
