#use strict;#if-debug sub version_ca { return '2.0.0.0072'; } =head1 HEAD Copyright 1997-2004 by Zoltan Milosevic, All Rights Reserved See http://www.xav.com/scripts/search/ for more information. If you edit the source code, you'll find it useful to restore the function comments and #&Assert checks: cd "search/searchmods/powerusr/" hacksubs.pl build_map hacksubs.pl restore_comments hacksubs.pl assert_on common_admin.pl contains functions that are only called from the Mode/Admin or Mode/AnonAdd pathways. =cut sub ui_Rewrite { my $err = ''; Err: { my $sa = $::FORM{'sa'} || ''; if ($sa eq 'save') { print <<"EOM";
$::str[96] / $::str[162] / Rewrite / $::str[362]
EOM my $test_str = 'foo bar'; my $level; foreach $level (0,1) { my @rules = (); foreach (sort keys %::FORM) { next unless (m!^$level\.(\d+)$!); my $key = $1; my ($p1, $p2) = ($::FORM{$key . '_p1'}, $::FORM{$key . '_p2'}); next unless ($p1); my @fields = ($::FORM{$key . '_enabled'}, $p1, $p2, $::FORM{$key . '_comment'}, $::FORM{$key . '_verbose'} ); eval '$test_str =~ s!$p1!$p2!isg;'; if ($@) { my ($hp1, $hp2) = &he($p1, $p2); $err = "unable to evaluate Perl substitution on '$hp1' and '$hp2' - Perl returned the following error string:" . &he($@); next Err; } my $str = join('=', &ue(@fields) ); push(@rules, $str); } my $key = 'rewrite_url_' . $level; $::Rules{$key} = '' if not exists $::Rules{$key}; $err = &WriteRule( $key , join('&',@rules) ); next Err if ($err); my $count = scalar @rules; print "
Success: saved $count level-$level rewrite rules.
\n"; } last Err; } my @out = ('', ''); my $template = <<"EOM";Enabled | $::str[309] | Pattern | Replace |
---|---|---|---|
Comment: |
$::str[96] / $::str[162] / Rewrite / Overview
$::const{'AdminForm'}Input Filters
The following Perl substitutions will be performed, in order, on all links as they are extracted from files during a crawl session.
If the $::str[309] bit is set, then a statement will be printed to screen whenever the substitution is successful. Use this for testing.
You may add new rewrite rules by using the first or last two blank tables. If you need to add more than that, simply enter two, then save, and then you will be able to add more. To delete a rule, delete the Pattern portion.
Use the Enabled bit to turn rules on or off during development. Link extraction rewrite rules are part of the critical path and should only be enabled if needed.
$out[0]Output Filters
The following Perl substitutions will be performed, in order, on all links just before they are shown in the search results.
If the $::str[309] bit is set, then a statement will be printed to screen whenever the substitution is successful. Use this for testing.
$out[1] EOM last Err; } continue { &ppstr(29,$err); } } sub ui_License { my $err = ''; Err: { print "$::str[96] / $::str[467] / "; my $sa = $::FORM{'sa'} || ''; if ($sa eq 'Write') { print "$::str[362]
"; if ($::private{'is_demo'}) { $err = $::str[435]; next Err; } if ($::FORM{'regkey'}) { unless (®key_validate($::FORM{'regkey'})) { $err = $::str[454] . " ($::str[432])"; next Err; } } elsif ($::FORM{'mode'} == 2) { $err = $::str[455]; next Err; } if ($::FORM{'mode'} == 3) { if (1 < $::realms->realm_count('all')) { $err = $::str[456]; next Err; } my $p_realm_data = (); foreach $p_realm_data ($::realms->listrealms('all')) { if ($$p_realm_data{'type'} == 1) { $err = &pstr(457,$$p_realm_data{'html_name'}); next Err; } elsif ($$p_realm_data{'type'} == 6) { $err = &pstr(175,$$p_realm_data{'html_name'}); next Err; } } } if (($::FORM{'regkey'}) and ('' eq $::Rules{'regkey'})) { $::FORM{'mode'} = 2; } $err = &WriteRule('mode', $::FORM{'mode'}); next Err if ($err); $err = &WriteRule('regkey', &ue($::FORM{'regkey'})); next Err if ($err); &ppstr(174,$::str[114]); last Err; } print "$::str[152]"; my %defaults = ( 'mode' => $::private{'mode'}, 'regkey' => &ud($::Rules{'regkey'}), ); $defaults{'regkey'} =~ s!(\015|\012|\r|\n)+!\015\012!sg; print &SetDefaults(<<"EOM", \%defaults); $::const{'AdminForm'}$::str[458] | $::str[447] | |
---|---|---|
$::str[459] | ||
$::str[460] | ||
$::str[468] |
$::str[386]
$::str[96] / $::str[162]"; if ($subactions{$subaction}) { print " / $subactions{$subaction}"; } print "
\n"; my $err = ''; Err: { local $_; if (($subaction eq 'CreateEdit') or ($subaction eq 'create_edit_rule')) { $err = &s_create_edit_rule(); next Err if ($err); last Err; } if ($subaction eq 'ShowPending') { &present_queued_pages($::FORM{'Realm'}); last Err; } if ($subaction eq 'PQP') { &process_queued_pages(); last Err; } my $fr = &fdse_filter_rules_new(); if ($subaction eq 'delete_rule') { $err = $fr->delete_filter_rule($::FORM{'name'}); next Err if ($err); &ppstr(174,&pstr(414,&he($::FORM{'name'}))); print '' . $::str[329] . '
'; last Err; } if ($subaction eq 'save_settings') { my $p_data = (); foreach $p_data ($fr->list_filter_rules()) { next if (($::private{'is_freeware'}) and ($$p_data{'is_system'} == 0)); my $name = $$p_data{'name'}; if ($::FORM{"$name-enabled"}) { $$p_data{'enabled'} = 1; } else { $$p_data{'enabled'} = 0; } } $err = $fr->frwrite(); next Err if ($err); foreach ( 'allowanonadd', 'require anon approval', 'allowanonadd: notify admin', 'allowanonadd: require user email', 'allowanonadd: log', 'allowanonadd: use rate', 'allowanonadd: max rate', ) { if (not exists($::FORM{$_})) { $err = "invalid argument. Required parameter '$_' is not defined"; next Err; } $err = &WriteRule($_, $::FORM{$_}); next Err if ($err); } my $private_key = ''; if ((exists($::FORM{'_virtual_aaa_ufs'})) and ($::FORM{'_virtual_aaa_ufs'})) { # generate a random 30-character alphanumeric server key my @charset = (0..9,'A'..'Z','a'..'z'); for (1..30) { $private_key .= $charset[int(rand(@charset))]; } } $err = &WriteRule( 'allowanonadd: use form-signature', $private_key ); next Err if ($err); $::FORM{'pics_rasci_enable'} = 0 unless ($::FORM{'pics_rasci_enable'}); $::FORM{'pics_ss_enable'} = 0 unless ($::FORM{'pics_ss_enable'}); foreach (keys %::FORM) { next unless (m!^pics_!); $err = &WriteRule($_, $::FORM{$_} || 0); next Err if ($err); } &ppstr(174,$::str[114]); last Err; } print <<"EOM"; $::const{'AdminForm'} EOM my $str_rule_list = ''; my @action_names = ( $::str[479], # always allow $::str[142], # deny $::str[478], # require approval $::str[477], # promote $::str[476], # no update on redirect $::str[338], # index nofollow $::str[337], # follow noindex ); my $p_data = (); foreach $p_data ($fr->list_filter_rules()) { if ($::private{'is_freeware'}) { next unless ($$p_data{'is_system'}); } my $en = ''; if ($$p_data{'enabled'}) { $en = ' checked="checked"'; } my $urlname = &ue($$p_data{'name'}); my $htmlname = &he($$p_data{'name'}); my $action = $action_names[$$p_data{'action'}]; $str_rule_list .= <<"EOM";%s | |
%s |
%s
EOM my (@pics_codes, @pics_names, @pics_values) = (); my $load_err = &load_pics_descriptions( $pics_type, \@pics_codes, \@pics_names, \@pics_values ); if ($load_err) { &ppstr(29,$load_err); } else { $html .= '%s | |
%s |
%s
EOM my (@pics_codes, @pics_names, @pics_values) = (); my $load_err = &load_pics_descriptions( $pics_type, \@pics_codes, \@pics_names, \@pics_values ); if ($load_err) { &ppstr(29,$load_err); } else { $html .= '' . $::str[329] . '
'; last Err; } my $html_orig_name = ''; if ($::FORM{'name'}) { $html_orig_name = &he($::FORM{'name'}); my $p_data = $fr->{$::FORM{'name'}}; unless ('HASH' eq ref($p_data)) { $err = &pstr(55,&he($::FORM{'name'})); next Err; } my $p_strings = $$p_data{'p_strings'}; $defaults{'substr'} = join("\n", @$p_strings); my $p_litstrings = $$p_data{'p_litstrings'}; $defaults{'litsubstr'} = join("\n", @$p_litstrings); foreach ('name', 'fr_action', 'promote_val', 'fr_analyze', 'fr_mode', 'enabled', 'occurrences', 'fr_apply_to') { my $name = $_; $name =~ s!^fr_!!o; $defaults{$_} = $$p_data{$name}; } if ($$p_data{'apply_to'} eq '2') { my @realm_types = split(m!\,!, $$p_data{'apply_to_str'} ); foreach (@realm_types) { next unless (m!^\d+$!); $defaults{"z$_"} = 1; } } elsif ($$p_data{'apply_to'} eq '3') { my @realms = split(m!\,!, $$p_data{'apply_to_str'} ); foreach (@realms) { $_ = &ud($_); next unless ($_); $defaults{"zz$_"} = 1; } } } else { my $num = 1; my $p_data = (); foreach $p_data ($fr->list_filter_rules()) { if ($$p_data{'name'} =~ m!New Rule (\d+)!i) { $num = ($1 + 1) if ($1 >= $num); } } $defaults{'name'} = "New Rule $num"; } my $name = $defaults{'name'}; $name = &he( $name ); my $name_form = qq!!; if ($system_rules{$name}) { $name_form = qq!$name!; } print <<"EOM"; $::const{'AdminForm'} EOM my %replace = %::const; $replace{'HTML_BLOCK_1'} = $name_form; my $i = 0; my $realm_list = ''; my $p_realm_data = (); foreach $p_realm_data ($::realms->listrealms('all')) { $i++; $realm_list .= qq!$str_index_time - $text_user $user_html.
$index_error
$::str[430] | $html_status |
$str_index_time - $text_user $user_html.
$index_error
$::str[426] $::str[427] $::str[142] |
$html_status |
$::str[421] - '$Realm'
$::str[430] - $::str[423].
$::str[142] - $::str[423].
$::str[427] - $::str[424]
$::str[426] - $::str[425]
$::str[422]
EOM print $jumptext; } last Err; } continue { &ppstr(29,$err); } } sub anonadd_main { my $err = ''; Err: { if (not $::Rules{'allowanonadd'}) { $err = $::str[173]; next Err; } elsif (0 == $::realms->realm_count('has_no_base_url')) { $err = &pstr( 552, $::str[431] ); next Err; } LimitSubmitRate: { last unless $::Rules{'allowanonadd: use rate'}; my @submit_times = split(m!\.!, $::Rules{'allowanonadd: recent submit times'}); # format: newest.new.medium.old.oldest my $submit_count = scalar @submit_times; last if ($submit_count < $::Rules{'allowanonadd: max rate'}); # not enough data to block my $time_of_nth_submission = $submit_times[ $::Rules{'allowanonadd: max rate'} - 1 ]; my $five_min = 5 * 60; if (($::private{'script_start_time'} - $time_of_nth_submission) < $five_min) { $err = &pstr( 557, $::Rules{'allowanonadd: max rate'} ); next Err; } } if ((exists($::FORM{'Realm'})) and ((exists($::FORM{'URL'})) or (exists($::FORM{'b_submit'})))) { # validate 'Realm' existence: my $p_realm; ($err, $p_realm) = $::realms->hashref( $::FORM{'Realm'} ); next Err if ($err); unless ($p_realm->{'is_open_realm'}) { $err = "realm '$p_realm->{'html_name'}' is not an open realm"; next Err; } my $failpoint = 0; ValidateFormSignature: { last unless $::Rules{'allowanonadd: use form-signature'}; unless ((exists($::FORM{'keynames'})) and ($::FORM{'keynames'} =~ m!^(\w{3})(\w{3})(\w{3})(\w{3})(\w{3})(\w{3})(\w{3})(\w{3})(\w{3})$!)) { $failpoint = 1; next; } my @names = ($1, $2, $3, $4, $5, $6, $7, $8); # reverse the aliases: $::FORM{'URL'} = $::FORM{$names[1]}; $::FORM{'EMAIL'} = $::FORM{$names[2]}; my $timestamp = $::FORM{$names[3]}; my $signature = $::FORM{$names[4]}; if (($::private{'script_start_time'} - $timestamp) > 20 * 60) { $failpoint = 2; next; } my $audit_sig = ''; my $index; foreach $index (0..4) { my $char8 = ''; $char8 .= substr( $timestamp, 2 * $index, 2 ); $char8 .= substr( $::Rules{'allowanonadd: use form-signature'}, 6 * $index, 6 ); my $salt = substr( $signature, 13 * $index, 13 ); $audit_sig .= crypt( $char8, $salt ); } if ($audit_sig ne $signature) { $failpoint = 3; next; } # full decoys: if (exists $::FORM{$names[5]}) { $failpoint = 4; next; } if (exists $::FORM{$names[6]}) { $failpoint = 5; next; } # either-or decoy: if ((exists $::FORM{$names[7]}) and (exists $::FORM{$names[0]})) { $failpoint = 6; next; } if ((not exists $::FORM{$names[7]}) and (not exists $::FORM{$names[0]})) { $failpoint = 7; next; } last; } continue { $err = qq!invalid form signature. Please visit the submission form and try again (failpoint $failpoint)!; next Err; } $::const{'is_cmd'} = 0; $err = &s_AddURL(1, $::FORM{'Realm'}, $::FORM{'URL'}); next Err if ($err); LimitSubmitRate: { last unless $::Rules{'allowanonadd: use rate'}; my @submit_times = split(m!\.!, $::Rules{'allowanonadd: recent submit times'}); # format: newest.new.medium.old.oldest my $submit_count = scalar @submit_times; if ($submit_count >= $::Rules{'allowanonadd: max rate'}) { splice( @submit_times, $::Rules{'allowanonadd: max rate'} - 1 ); } my $new = join( '.', ($::private{'script_start_time'}, @submit_times) ); $err = &WriteRule( 'allowanonadd: recent submit times', $new ); next Err if ($err); } } my ($count, $html_hidden, $html_tr) = $::realms->html_select_ex('is_open_realm', $::FORM{'Realm'} ); my $hidden = ''; my ($n, $v); while (($n, $v) = &he(each %::FORM)) { next unless ($n =~ m!^p:!); $hidden .= qq!\n!; } if (($::Rules{'default search terms'}) and (not ($::FORM{'EMAIL'}))) { $::FORM{'EMAIL'} = 'you@yourhost.tld'; } my %defaults = %::FORM; $defaults{'URL'} = $::FORM{'URL'} || 'http://'; my %alias_field_names = ('URL' => 'URL','EMAIL' => 'EMAIL'); my %inserted_form_elements = (); CreateFormSignature: { last unless $::Rules{'allowanonadd: use form-signature'}; my $timestamp = substr($::private{'script_start_time'},0,10); # limit to 10 chars $timestamp = ('0' x (10 - length($timestamp))) . $timestamp; # 0-pad for earlier times my @charset = (0..9,'a'..'z','A'..'Z'); # create eight unique random 3-character strings: my %uniq = (); my @names = (); while ($#names < 8) { my $ran = ''; $ran .= $charset[int(rand(@charset))]; $ran .= $charset[int(rand(@charset))]; $ran .= $charset[int(rand(@charset))]; next if exists $uniq{$ran}; push(@names,$ran); $uniq{$ran} = 1; } my $signature = ''; my $index; foreach $index (0..4) { my $char8 = ''; $char8 .= substr( $timestamp, 2 * $index, 2 ); $char8 .= substr( $::Rules{'allowanonadd: use form-signature'}, 6 * $index, 6 ); my $salt = $charset[int(rand(@charset))] . $charset[int(rand(@charset))]; $signature .= crypt( $char8, $salt ); } %alias_field_names = ( 'URL' => $names[1], 'EMAIL' => $names[2], 'time' => $names[3], 'sig' => $names[4], ); $defaults{'keynames'} = join( '', @names ); $defaults{$names[3]} = $timestamp; $defaults{$names[4]} = $signature; $defaults{$names[1]} = $::FORM{'URL'} || 'http://'; $defaults{$names[2]} = $::FORM{'EMAIL'}; $inserted_form_elements{ qq!! } = 1; $inserted_form_elements{ qq!! } = 1; $inserted_form_elements{ qq!! } = 1; # full decoys: $inserted_form_elements{ qq!! } = 1; $inserted_form_elements{ qq~~ } = 1; # either-or decoy: $inserted_form_elements{ qq!! } = 1; $inserted_form_elements{ qq!! } = 1; } my $input = qq!!; if ($::Rules{'multi-line add-url form - visitors'}) { $input = qq!!; } $hidden .= join('', keys %inserted_form_elements); print &SetDefaults( <<"EOM", \%defaults );$::str[172]
EOM last Err; } return $err; } sub admin_main { my $err = ''; Err: { local $_; #changed 0056 IPLimit: { last IPLimit unless (($::private{'visitor_ip_addr'}) and ($::private{'allow_admin_access_from'})); # patterns must be of the format a.b.c.d or a.b.c.* or a.b.* or a.* if ($::private{'allow_admin_access_from'} =~ m![^0-9\.\s\*]!) { my $hstr = &he($::private{'allow_admin_access_from'}); $err = "string 'allow_admin_access_from' can only contain numbers, spaces, dots, and asterisks. An example of a valid string is '123.45.6.*'. Your string is currently set to '$hstr' which includes characters from outside the allowed set"; next Err; } my @patterns = split(m!\s+!, $::private{'allow_admin_access_from'}); foreach (@patterns) { s!\.!\\\.!g; s!\*!\.\*!g; last IPLimit if ($::private{'visitor_ip_addr'} =~ m!^$_$!); } $err = "access denied to admin functions. Your IP address $::private{'visitor_ip_addr'} is not among the list of allowed addresses. The list of allowed addresses is controlled with the 'allow_admin_access_from' variable within the source code"; next Err; } $| = 1; my $action = (exists $::FORM{'Action'}) ? $::FORM{'Action'} : ''; if ($action ne 'NavBar') { #changed 0045 -- is this folder writable? my $w_test = 'is_writable.txt'; if ((-e $w_test) and (not unlink($w_test))) { $err = &pstr(54, $w_test, $!); next Err; } unless (open( FILE, ">$w_test" )) { $err = &pstr(472, $!); next Err; } close(FILE); unlink($w_test); } $::const{'is_cmd'} = ((exists($::FORM{'interface'})) and ($::FORM{'interface'} eq 'cmdline')) ? 1 : 0; if (exists($::FORM{'fdrk_audit'})) { ®key_verify(); last Err; } my ($is_auth, $form_password, $url_password) = &Authenticate( $::Rules{'password'} ); last Err unless ($is_auth); # Initialize network client cache: my %nc_cache = (); $::private{'p_nc_cache'} = \%nc_cache; $::const{'AdminForm'} = qq! EOM last Err; } if ($subaction eq 'delete') { my $delcount = 0; if ($::FORM{'del:csv'}) { $err = &WriteFile('search.log.txt',''); next Err if ($err); &ppstr( 174, &pstr( 383, 'search.log.txt' ) ); $delcount++; } if ($::FORM{'del:dbm'}) { if (not $::Rules{'use dbm routines'}) { &pppstr(347, $::str[328] ); } else { eval { foreach ('dbm_strlog_top','dbm_strlog_all') { my %hash = (); dbmopen( %hash, $_, 0666 ) || die &pstr( 43, $_, $! ); %hash = (); # clear dbmclose(%hash); &ppstr( 174, &pstr( 383, $_ ) ); } }; if ($@) { &ppstr(53, &pstr(20, &he($@), "$::const{'help_file'}1169.html" ) ); } $delcount++; } } unless ($delcount) { $err = $::str[354]; next Err; } last Err; } my $focus = lc($::FORM{'focus'}); $focus = 'id' unless ($focus); # name of a field my $query = 0; if ($::FORM{'orderby'} =~ m!^\d+$!) { $query = $::FORM{'orderby'}; } #change 0049 - queries on string date-time (3) are internally handled as 2, the Unix datetime $query = 2 if ($query == 3); my $field_name = $FieldNames[$query]; my $AsciiSort = not ($query =~ m!^(0|2|6|7|8)$!); my %Groups = (); my $ptr = 0; #DBM-based sorts if ($subaction eq 'dbm') { unless ($::Rules{'use dbm routines'}) { $err = $::str[328]; next Err; } unless (defined($::FORM{'file'})) { $err = "parameter 'file' missing"; next Err; } my $file = $::FORM{'file'}; unless ($file =~ m!^dbm_strlog_(all|top)$!) { $err = "file must match ^dbm_strlog_(all|top)\$"; next Err; } eval { my %str = (); dbmopen( %str, $file, 0666 ) || die &pstr( 44, $file, $! ); my $count = 1; print "
This file '$file' was initialized on " . &FormatDateTime( $str{'++'}, $::Rules{'ui: date format'} ) . ".
\n" if ($str{'++'}); my $obkey = (($::FORM{'ob'}) and ($::FORM{'ob'} eq 'key')) ? 1 : 0; my $rev = (($::FORM{'sort'}) and ($::FORM{'sort'} eq 'rev')) ? 1 : 0; my $nsort = $rev ? 'n' : 'rev'; print <<"EOM";Frequency | $::str[112] | |
---|---|---|
$value | $name |
Handled $total total records.
EOM }; if ($@) { $err = &pstr(20, &he($@), "$::const{'help_file'}1169.html" ); next Err; } } else { unless (-e 'search.log.txt') { $err = &pstr(155,'search.log.txt'); next Err; } # Migrate log file format if necessary: if (open(LOGFILE, "$::str[122]
$name | \n"; if (($::FORM{'orderby'} eq $rev_FieldNames{$name}) and (not ($::FORM{'sort'}))) { print "$name | "; } else { print "$name | "; } } print "|||||
---|---|---|---|---|---|---|---|
%s | %s | %s | %s | %s | %s | %s | %s |
$::str[122]
$::str[140] | $FieldNames[$query] |
---|
$::str[96] / $::str[327] / "; if ($subaction eq 'DeleteRealm') { print $::str[430]; } elsif ($subaction eq 'Create') { print qq!$::str[94]!; } elsif ($subaction eq 'Edit') { print $::str[368]; } else { print $::str[152]; } print "
\n"; if ($subaction eq 'DeleteRealm') { my $p_realm_data = (); ($err, $p_realm_data) = $::realms->hashref($::FORM{'Delete'} ); next Err if ($err); my $realm_id = $$p_realm_data{'realm_id'}; my $realm_name = $$p_realm_data{'name'};#keep - we won't be able to query p_realm_data later! my $index_file = $$p_realm_data{'file'}; $::realms->remove( $$p_realm_data{'name'}, 1 ); $err = $::realms->save_realm_data(); next Err if ($err); # Deal with the remaining data: &ppstr(174, $::str[177] ); my $delcount = 0; ($err, $delcount) = &DeleteFromPending( $realm_name ); next Err if ($err); &ppstr(174, &pstr(178,$delcount,'search.pending.txt')); # Deal with file data: if ($::Rules{'delete index file with realm'}) { &delete_index_file( $index_file ); } else { if (($index_file) and (-e $index_file)) { &pppstr(176, $index_file, int((1023 + (-s $index_file))/1024) ); # is this a valid file name, according to our check in DelFile? only offer to delete the file if the check will pass: # make sure it only contains \w characters and '.' my $filechars = $index_file; $filechars =~ s!\w!!g; # strip all alphanumerics and _ $filechars =~ s!\.!!o; # strip up to one '.' if ($filechars) { # uh-oh - other characters remain # no offer to delete } else { print <<"EOM"; $::const{'AdminForm'}EOM } } } last Err; } elsif ($subaction eq 'DelFile') { &delete_index_file( $::FORM{'File'} ); if ($::FORM{'ad'}) { $err = &WriteRule('delete index file with realm',1); next Err if ($err); &ppstr(174,&pstr(404,'delete index file with realm',1)); } last Err; } elsif (($subaction eq 'Create') or ($subaction eq 'Edit')) { if ($::private{'is_freeware'}) { if ($subaction eq 'Create') { if ($::realms->realm_count('all')) { $err = "only one realm is allowed in Freeware mode"; next Err; } } } my ($defname, $deffile) = $::realms->get_default_name(); unless ($file) { $file = $deffile; } unless ($Name) { $Name = $defname; } unless ($::FORM{'type'}) { # default realm type: $::FORM{'type'} = $::Rules{'use socket routines'} ? 3 : 4; } $base_url = 'http://' unless ($base_url); $base_url = 'http://' if ($::FORM{'type'} == 6); my %defaults = ( 'type' => $::FORM{'type'}, 'name' => $Name, 'is_update' => $::FORM{'is_update'}, 'is_website' => 0, 'is_filefed' => 0, 'is_local' => 0, 'is_runtime' => 0, 'file' => $file, 'base_url2' => $base_url, 'base_url3' => $base_url, 'base_url4' => $base_url, 'base_url5' => $base_url, 'base_dir4' => $base_dir, 'base_dir5' => $base_dir, 'limit_pattern' => $limit_pattern, ); my $table_header = $is_update ? $::str[368] : $::str[94]; my $submit_button = $is_update ? $::str[362] : $::str[94]; my $h_orig_name = &he( $::FORM{'orig_name'} ); my $b_allow_filtered_realms = (($::Rules{'show advanced commands'}) or ($defaults{'type'} == 6)); unless ($::FORM{'Write'}) { # prevent people from switching to/from runtime type of realm my $b_only_runtime = 0; my $b_no_runtime = 0; if ($is_update) { if ($::FORM{'type'} == 5) { $b_only_runtime = 1; } else { $b_no_runtime = 1; } if (($::FORM{'type'} != 4) and ($::FORM{'type'} != 5) and (not $::Rules{'use socket routines'})) { # special case -- user has disabled sockets, but they are editing an existing realm that # depends on sockets. Make sure that all realm types are present $::Rules{'use socket routines'} = 1; } } print &SetDefaults(<<"EOM", \%defaults); $::const{'AdminForm'}$::str[331]
$table_header | |
---|---|
$::str[428]: | |
$::str[369]: |
$::str[367]
$::str[475] | ||
$::str[314] | ||
Pattern: | ||
$::str[208] | ||
$::str[166]: | ||
$::str[471] | ||
$::str[166]: | $limit_pattern_hidden | |
$::str[471] | ||
$::str[166]: | ||
$::str[399]: | ||
$::str[212] | ||
$::str[166]: | ||
$::str[399]: |
EOM } if ($::FORM{'Write'}) { my ($base_url, $base_dir) = ('', ''); my $Name = $::FORM{'name'}; if ($Name =~ m!^(all|include-by-name)$!i) { $err = &pstr(441,$Name); next Err; } my $File = $::FORM{'file'}; my $type = $::FORM{'type'}; my ($is_runtime, $is_filefed) = (0, 0); if ($type == 6) { $base_url = 'http://filtered:1/'; } elsif ($type == 5) { $is_runtime = 1; $File = 'RUNTIME'; $base_url = $::FORM{'base_url5'}; $base_dir = $::FORM{'base_dir5'}; } elsif ($type == 4) { # oka $base_url = $::FORM{'base_url4'}; $base_dir = $::FORM{'base_dir4'}; } elsif ($type == 3) { $base_url = $::FORM{'base_url3'}; } elsif ($type == 2) { $is_filefed = 1; $base_url = $::FORM{'base_url2'}; } elsif ($type == 1) { # cool } else { $err = "invalid type - $type"; next Err; } if (($type > 3) and ($::private{'is_demo'})) { $err = $::str[435]; next Err; } #changed 0035 - this occurs when somebody chooses "Edit" a RUNTIME realm and they toggle the # radio buttons to change type, but don't type in a new filename. Also if somebody # enters the otherwise reserved word "runtime" we should point them in a different direction if ((uc($File) eq 'RUNTIME') and ($type != 5)) { my ($defname, $deffile) = $::realms->get_default_name(); $File = $deffile; } unless ($Name) { $err = &pstr(21, $::str[428] ); next Err; } unless ($File) { $err = &pstr(21, $::str[369] ); next Err; } # use all forward slashes: $base_dir =~ s!\\!/!g; if ($type != 1) { ($err, $base_url) = &uri_parse($base_url); next Err if ($err); } # do not allow delimiters within the values: for ($Name, $File, $base_dir, $base_url) { if (m!(\r|\n|\||\012|\015)!) { my $bad = &he($1); $err = &pstr(75,&he($_),$bad); next Err; } } #0054: -T compat if ($File =~ m!\.\.!) { $err = "realm file name cannot contain '..' substring"; next Err; } &untaintme( \$File ); &untaintme( \$base_dir ); if (($type == 4) or ($type == 5)) { unless (opendir(DIR, $base_dir)) { $err = &pstr(63, &he($base_dir), $! ); next Err; } closedir(DIR); } unless ($is_runtime) { # don't bother with LockFile here, because index file is not yet being hammered by multiple processes: if (open( FILE, ">>$File" )) { close(FILE); chmod($::private{'file_mask'},$File); } else { $err = &pstr(42, &he($File), $! ); next Err; } } if ($::FORM{'limit_pattern'}) { $err = &check_regex($::FORM{'limit_pattern'}); next Err if ($err); } my $realm_id = 0; my $page_count = 0; my $p_realm_data = (); # Is this really an update operation? If so, retain the 'realm_id' - that's kinda important: if ($is_update) { ($err, $p_realm_data) = $::realms->hashref( $::FORM{'orig_name'} ); unless ($err) { $realm_id = $$p_realm_data{'realm_id'}; $page_count = $$p_realm_data{'pagecount'}; } else { $err = ''; # clear } } if ($::FORM{'orig_name'}) { $::realms->remove( $::FORM{'orig_name'}, 0 ); } $::realms->remove( $Name, 0 ); $::realms->add( $realm_id, $Name, $::Rules{'sql: enable'}, $File, $is_runtime, $base_dir, $base_url, '', $page_count, $is_filefed, $type, ($::FORM{'limit_pattern'} || '') ); $err = $::realms->save_realm_data(); next Err if ($err); ($err, $p_realm_data) = $::realms->hashref($Name); next Err if ($err); #changed 0050 update realm-specific filter rules on a rename op: if (($is_update) and ($::FORM{'orig_name'} ne $Name)) { my $url_orig = &ue($::FORM{'orig_name'}); my $is_changed = 0; my $fr = &fdse_filter_rules_new(); my $p_fr = (); foreach $p_fr ($fr->list_filter_rules()) { next unless ($$p_fr{'apply_to'} == 3); # only realm-specific rules $is_changed += scalar ($$p_fr{'apply_to_str'} =~ s!(^|,)$url_orig($|,)!$1$$p_realm_data{'url_name'}$2!g); } if ($is_changed) { $err = $fr->frwrite(); next Err if ($err); } } if ($is_update) { &ppstr(174, $::str[114] ); } else { &ppstr(174, &pstr(372, &he($Name) ) ); } Pending: { # update the pending pages file because we may have just added or removed a non-empty index file, and we need to # sync the contents of pending.txt with that data # we may have renamed a realm, and we need to sync the realm name last Pending if ($File eq 'RUNTIME'); my @NewRecords = (); my $new_record_count = 0; my $url_realm = &ue( $Name ); my $Time = $::private{'script_start_time'}; last Pending unless (open( FILE, "<$File" )); binmode(FILE); while (defined($_ =
This Filtered Realm must be restricted by filter rules to prevent it from indexing the entire web.
The rule $hfname has been created as a sample rule to get you started.
That filter rule is disabled by default. Before enabling it, you must enter some strings or patterns to describe the type of URL's that you would like to include in this realm.
\n!; } last Err; } last Err; } print <<"EOM";$::str[327]
$$p_realm_data{'html_name'} - $$p_realm_data{'err'}.
"; &ppstr(182, "$::str[411]", "$::str[430]" ); print "
\n"; } } if (@open_realms) { my $n_actions = 2; $n_actions++ if ($::realms->{'need_approval'}); my $suggest_rules = &pstr(107, "$::str[162]" ); print <<"EOM";$::str[431]
$::str[475]
$suggest_rules
EOM print '$::str[268]
$::str[314]
EOM print '$::str[474]
$::str[471]
EOM print '$::str[339]
$::str[212]
EOM print '$::str[489]
$::str[208]
EOM print '$::str[96] / $::str[443]
\n!; &pppstr(348, "$::const{'help_file'}1150.html" ); my $err = ''; Err: { if ($::Rules{'sql: enable'}) { print qq!Warning: mysql data storage support has been removed from FDSE. See this help file for more information and for instructions on how to upgrade. This warning appears because the SQL: Enable General Setting is checked. |
$$p_realm_data{'html_name'} - $$p_realm_data{'err'}.
\n"; &pppstr(182, "$::str[411]", "$::str[430]" ); } } #changed 0054 -- allow website, file-fed, filtered, and open realms to use Add New URL form my $count = 0; my $ChooseRealmLine = ''; my $p_data; foreach $p_data ($::realms->listrealms('all')) { next if (($$p_data{'type'} == 4) or ($$p_data{'type'} == 5)); my $type = ''; if ($$p_data{'type'} == 1) { $type = $::str[553]; } elsif ($$p_data{'type'} == 2) { $type = $::str[554]; } elsif ($$p_data{'type'} == 3) { $type = $::str[550]; } $type .= ': ' if $type; $ChooseRealmLine .= qq!\n!; $count++; } my $ref_manage_realms = &pstr(371, qq!$::str[327]! ); # the "Add New URL" form appears in all non-Freeware versions and in Freeware if there are valid realms which can accept # single URL additions (count > 0) if (((not $::private{'is_freeware'}) or ($count)) and ($::Rules{'use socket routines'})) { if (not $::private{'is_freeware'}) { $ChooseRealmLine .= qq!\n!; } my $input = qq!!; if (($::Rules{'show advanced commands'}) or (not $::Rules{'multi-line add-url form - admin'})) { $input = qq!!; } print <<"EOM";$::str[172]
EOM } # the "Add New Site" form: if (((not $::private{'is_freeware'}) or (0 == $::realms->realm_count('all'))) and ($::Rules{'use socket routines'})) { print <<"EOM";$::str[291]
$::const{'AdminForm'}
EOM print <<"EOM" if ($count); $::str[74]: $input EOM print <<"EOM" if ($::Rules{'show advanced commands'}); $::str[161]: EOM print <<"EOM"; $::str[429]
$::str[290]
EOM } print <<"EOM";$::str[287]
$::const{'AdminForm'}
$::str[74]:
$::str[377]
$::str[326]
$ref_manage_realms
EOM print 'Totals | Size | Pages | |||
---|---|---|---|---|---|
$total_size KB | $total_pages |
$::str[96] / $::str[183] EOM my $subaction = $::FORM{'subaction'} || ''; if ($subaction eq 'SaveData') { print " / $::str[362]
\n"; if ($::FORM{'admin notify: sendmail program'}) { my $b_is_valid = 0; foreach (@::sendmail) { $b_is_valid = 1 if ($_ eq $::FORM{'admin notify: sendmail program'}); } unless ($b_is_valid) { $err = &pstr(144, &he($::FORM{'admin notify: sendmail program'}) ); next Err; } } foreach ('admin notify: email address', 'admin notify: smtp server', 'admin notify: sendmail program') { $err = &WriteRule($_,$::FORM{$_} || ''); next Err if ($err); &ppstr(174, &pstr(404,&he($_, $::FORM{$_}))); } foreach ('security: session timeout') { $err = &WriteRule($_,$::FORM{$_} || 0); next Err if ($err); &ppstr(174, &pstr(404,&he($_,$::FORM{$_}))); } if (($::FORM{'op'}) and ($::FORM{'np'}) and ($::FORM{'cp'})) { if ($::private{'is_demo'}) { &ppstr(53, $::str[435] ); last Err; } my $seed = 'sX'; if ($::FORM{'np'} ne $::FORM{'cp'}) { &ppstr(29, $::str[285] ); } elsif ($::Rules{'password'} eq crypt($::FORM{'op'}, $seed)) { # well, okay so far: my $newpass = crypt($::FORM{'np'}, $seed); $err = &WriteRule( 'password', $newpass ); next Err if ($err); &ppstr(174, $::str[293] ); } else { &ppstr(29, $::str[181] ); } } last Err; } if ($subaction eq 'TestMail') { print qq! / $::str[168]\n!; my $test_msg = <<"EOM"; Hello! This is a test message from your search engine. The following options were used to send it: Email address: $::Rules{'admin notify: email address'} SMTP server: $::Rules{'admin notify: smtp server'} Sendmail Program: $::Rules{'admin notify: sendmail program'} EOM my $trace = ''; ($err, $trace) = &SendMailEx( 'handler_order' => '12', 'to' => $::Rules{'admin notify: email address'}, 'from' => $::Rules{'admin notify: email address'}, 'host' => $::Rules{'admin notify: smtp server'}, 'pipeto' => $::Rules{'admin notify: sendmail program'}, 'p_nc_cache' => $::private{'p_nc_cache'}, 'use standard io' => $::Rules{'use standard io'}, 'subject' => "Test Message from search engine", 'message' => $test_msg, ); next Err if ($err); $trace = &he( $trace ); &ppstr(174, $::str[116] ); print qq!$::str[117]
\n!; last Err; } print <<"EOM"; / $::str[152] $::const{'AdminForm'} EOM $::const{'sendmail_options'} = ''; foreach (sort @::sendmail) { next unless (m!^(\S+)!); next unless (-e $1); $::const{'sendmail_options'} .= ''; } my $text = &PrintTemplate( 1, 'admin_personal.txt', $::Rules{'language'}, \%::const ); print &SetDefaults($text, \%::Rules); print ''; last Err; } continue { &ppstr(29, $err ); } } sub ui_BCST { my $err = ''; Err: { &handlers_init( 1 ); # load all handlers... my $sa = &he($::FORM{'sa'} || ''); if ($sa eq '') { print <<"EOM";$::str[96] / $::str[159] / Binary Converters - Setup and Test
Enabled Converters
The following table lists all binary-to-HTML handlers:
Enabled | Name | Extension Pattern | Content-Type Pattern | Actions | |
---|---|---|---|---|---|
$str_enabled | $p_handler->{'name'} | $p_handler->{'extension_pattern'} | $p_handler->{'content_type_pattern'} | $test_link | Help |
To enable one of the disabled handlers, click the corresponding Help link.
\n"; } my $test_realm_name = 'Binary Conversion Test'; my $ue_name = &ue($test_realm_name); my $adv_test = qq!Create Test Realm
DO NOT click the "rebuild" link after creating the test realm. Instead, return to this page and move to the next step.
Click here to automatically create a realm that searches the test binaries on xav.com.
Or, to test on your own content, go to the Create New Realm interface. Create a realm named "$test_realm_name". Configure the realm to index your binary files.
Index Test Realm
Create a realm named "$test_realm_name" first. Then return to this page and reload it.
Create Test Realm
Click here to edit the "$test_realm_name" realm. Make sure it includes binary files.
Index Test Realm
Click here to index all files in the "$test_realm_name" realm. This indexing will be done with the "debug=1" flag, which causes extra status messages to be printed. These extra status messages should help you determine whether the conversion is working.
Integration Test
Click here to cross-reference enabled binary converters with related General Settings. This will confirm that your system is configured to discover all binary file types that it knows how to read.
Advanced Test
The Syntax Test and Integration Test actions only perform basic validation of the settings. Before you can be sure the converters are working, you need to test them on actual binary files.
$adv_test$::str[96] / $::str[159] / Binary Converters - Setup and Test / $hname
EOM my $p_handler; foreach $p_handler (@{ $::private{'handlers'} }) { next unless ($::FORM{'name'} eq $p_handler->{'name'}); if (not exists($p_handler->{'test_syntax'})) { $err = "binary handler '$hname' does not have a test routine"; next Err; } $err = &{ $p_handler->{'test_syntax'} }( 1 ); next Err if ($err); last Err; } $err = "binary handler '$hname' not found in handler array"; next Err; } if ($sa eq 'crossref') { print <<"EOM";$::str[96] / $::str[159] / Binary Converters - Setup and Test / Integration Test
EOM my @file_extensions_supported = (); my @file_extensions_not = (); my $p_handler; foreach $p_handler (@{ $::private{'handlers'} }) { my $hname = &he($p_handler->{'name'}); my $ext_pattern = $p_handler->{'extension_pattern'}; if (not $p_handler->{'enabled'}) { push(@file_extensions_not, $ext_pattern ); print "File extension pattern $ext_pattern not supported because binary converted is not enabled.
\n"; next; } if (exists($p_handler->{'test_syntax'})) { my $test_err = &{ $p_handler->{'test_syntax'} }( 0 ); if ($test_err) { push( @file_extensions_not, $ext_pattern ); print "File extension pattern $ext_pattern not supported because syntax test returns error '$test_err'.
\n"; next; } } push( @file_extensions_supported, $ext_pattern ); print "File extension pattern $ext_pattern supported.
\n"; } my $warnings = 0; my $hext = &he($::Rules{'ext'}); my @exts = split(m!\s+!, $::Rules{'ext'}); my $ext; # do any of these allowed extensions match an unsupported handler? foreach $ext (@exts) { foreach (@file_extensions_not) { if ($ext =~ m!$_!i) { print qq!Warning: General Setting Ext contains extension '$ext' which pattern matches to '$_', but that pattern is not supported. File extension '$ext' should be removed.
\n!; $warnings++; } } } my $pattern; Pattern: foreach $pattern (@file_extensions_supported) { # is there some 'Ext' setting to match this? foreach $ext (@exts) { if ($ext =~ m!$pattern!i) { # okay, all good next Pattern; } } print qq!Warning: there is a binary converter available for extension pattern $pattern, but no file extensions in General Setting Ext match this pattern. You should add extensions for this binary type.
\n!; $warnings++; } if ($warnings == 0) { print "Success: confirmed that General Setting Ext is configured properly.
\n"; } $warnings = 0; $hext = &he($::Rules{'crawler: ignore links to'}); @exts = split(m!\s+!, $::Rules{'crawler: ignore links to'}); # do any of these allowed extensions match a supported handler? foreach $ext (@exts) { foreach (@file_extensions_supported) { if ($ext =~ m!$_!i) { print qq!Warning: General Setting Crawler: Ignore Links To contains extension '$ext' which pattern matches to '$_'. A converter is defined for that pattern, and so the extension should not be ignored. File extension '$ext' should be removed from this General Setting.
\n!; $warnings++; } } } Pattern: foreach $pattern (@file_extensions_not) { # is there some 'Ext' setting to match this? foreach $ext (@exts) { if ($ext =~ m!$pattern!i) { # okay, all good next Pattern; } } print qq!Warning: General Setting Crawler: Ignore Links To does not include an extension for unsupported pattern $pattern. You should add a file extension to this General Setting so that this unsupported binary type is ignored.
\n!; $warnings++; } if ($warnings == 0) { print "Success: confirmed that General Setting Crawler: Ignore Links To is configured properly.
\n"; } last Err; } $err = "subaction '$sa' not defined for this interface"; next Err; } return $err; } sub ui_sysinfo { print <<"EOM";$::str[96] / $::str[159] / $::str[92]
EOM &pppstr(488, $], $^X, $^O, &query_env('SERVER_SOFTWARE')); my $xpdf = $::private{'pdf utility folder'} ? $::private{'pdf utility folder'} : '[none]'; print <<"EOM";Data Folder: | $::private{'support_dir'} | $::str[432] |
$::str[91]
$name | $value |
Perl libraries loaded
$name | $value |
$::str[96] / $top_name EOM if ($::FORM{'Edit'}) { my $html_name = &he( $::FORM{'Edit'} ); print " / $html_name"; } if ($sa eq 'Write') { print " / $::str[362]
\n"; } else { print " / $::str[152]\n"; } } if ($::FORM{'Edit'}) { my $name = $::FORM{'Edit'}; my $lc_name = lc($name); my $html_name = &he($name); my $type = $$r_defaults{$lc_name}[1]; if ($sa eq 'Write') { if (($lc_name eq 'no frames') and ($::private{'is_demo'})) { &ppstr(53, $::str[435] ); last Err; } my $value = $::FORM{'VALUE'}; if ($type == 1) { $value = ($value) ? 1 : 0; } # strip line breaks, as promised: $value =~ s!(\n|\r|\015|\012)! !sg; my $b_changed = ($::Rules{$lc_name} ne $value) ? 1 : 0; $err = &WriteRule( $lc_name, $value ); next Err if ($err); &ppstr(174, &pstr(404,$html_name,&he($value))); if (($b_changed) and ($setting_info{$lc_name}) and ($setting_info{$lc_name}->[2])) { if (1 == $setting_info{$lc_name}->[2]) { print '' . $::str[329] . '
'; } else { print '' . $::str[109] . '
'; } } last Err; } my $value = $::Rules{$lc_name}; my $def_value = $$r_defaults{$lc_name}[0]; my @type_desc = ( 0, $::str[392], $::str[393], $::str[394], $::str[395], $::str[396], ); my $minmax = ''; if ($type == 3) { $minmax = " ($::str[405] " . $$r_defaults{$lc_name}[2] . "; $::str[406] " . $$r_defaults{$lc_name}[3] . ")"; } my %defaults = ( 'VALUE' => $::Rules{$lc_name}, ); print <<"EOM"; $::const{'AdminForm'} EOM my $description = $setting_info{$lc_name}->[0]; # if Boolean checkbox value: if ($type == 1) { print &SetDefaults(<<"EOM", \%defaults);$::str[402] | |
---|---|
$description
EOM # option to restore default with single click: if ($value ne $def_value) { &pppstr(401, "$::str[193]" ); } else { print "$::str[353]
\n"; } } # otherwise, if not a Boolean value (string/int/text): else { my $form_element = ''; if ((40 < length($value)) or (40 < length($def_value))) { $form_element = ''; } elsif (($type == 2) or ($type == 3)) { $form_element = ''; } print &SetDefaults(<<"EOM", \%defaults);$::str[159]: $html_name | |
---|---|
$::str[428]: | $html_name |
$::str[45]: | $description |
$::str[157]: | $type_desc[$type]$minmax |
$::str[90]: | $form_element |
$::str[97]: | $form_element |
$::str[353]
\n"; } print "" . $::str[403] . "
\n"; } last Err; } my $show_all_opt = 0; unless (@settings) { $show_all_opt = 1; foreach (keys %setting_info) { next unless ($setting_info{$_}->[1]); push(@settings,$_); } } my %show_settings = (); foreach (@settings) { $show_settings{$_} = 1; } if ($show_all_opt) { print '';
&ppstr(488, $], $^X, $^O, &query_env('SERVER_SOFTWARE'));
print "
[ $::str[358] ]
EOM } my $lc_name; foreach $lc_name (sort keys %setting_info) { next unless ($show_settings{$lc_name}); my $name = &Capitalize($lc_name); my $url_name = &ue($name); my $html_name = &he($name); my $default = $$r_defaults{$lc_name}[0]; my $current_val = $::Rules{$lc_name}; my $def = ''; if ($current_val eq $default) { $def = " ($::str[234]) "; } else { $def = " ($::str[223]) "; } my $display_val = $current_val; if (length($current_val) > 15) { $display_val = substr($current_val, 0, 12) . "..."; } $display_val = &he($display_val); my $description = $setting_info{$lc_name}->[0]; print "$::str[486]: Binary Converters - Setup and Test
$::str[486]: $::str[473]
[ $::str[411] ] $html_name = $display_val $def
$description
$::str[73]: $cancel_msg"; } next Err; } # has our file grown too big? my $TempSize = -s $TempFile; # zero max size negates size checking if (($::Rules{'max index file size'}) and ($TempSize > $::Rules{'max index file size'})) { # The temp file is too big - abort everything: my $max_size = &FormatNumber( $::Rules{'max index file size'}, 0, 1, 0, 1, $::Rules{'ui: number format'} ); $TempSize = &FormatNumber( $TempSize , 0, 1, 0, 1, $::Rules{'ui: number format'} ); $err = &pstr(410, $max_size, $$p_realm_data{'file'}, $TempSize ); my $cancel_msg = $obj->Cancel(); if ($cancel_msg) { $err .= "
$::str[73]: $cancel_msg"; } next Err; } $err = $obj->Merge(); next Err if ($err); $err = $::realms->setpagecount($realm, $total_records, 1); next Err if ($err); } return ($err, $total_records, $new_records, $updated_records, $deleted_records); } sub update_realm { return &update_file(@_); } sub query_realm { my ($realm, $query_pattern, $start_pos, $max_results, $ref_crawler_results) = @_; my $err = ''; Err: { $err = &check_regex($query_pattern); next Err if ($err); my $p_realm_data = (); ($err, $p_realm_data) = $::realms->hashref($realm); next Err if ($err); if ($$p_realm_data{'is_runtime'}) { return &query_runtime(@_); } else { return &query_file(@_); } } return $err; } sub query_runtime { my ($realm, $query_pattern, $start_pos, $max_results, $ref_crawler_results) = @_; my $err = ''; Err: { $err = &check_regex($query_pattern); next Err if ($err); my ($p_realm_data) = (); ($err, $p_realm_data) = $::realms->hashref($realm); next Err if ($err); my $fr = &fdse_filter_rules_new($p_realm_data); my $gf = &GetFiles_new(); $err = $gf->create_file_list( 'base_dir' => $$p_realm_data{'base_dir'}, 'base_url' => $$p_realm_data{'base_url'}, 'fr' => \$fr, 'tempfile' => "runtime.file_list. " . int(100 * rand()) . ".txt", 'verbose' => 0, ); next Err if ($err); if ($start_pos) { $gf->resume_file_position( $start_pos ); } my $count = 0; my $record_err_msg = ''; while ($count < $max_results) { my ($lastmodt, $size, $fullfile, $basefile, $url) = $gf->get_next_file(); last unless ($url); my %pagedata = (); ($record_err_msg,$url) = &pagedata_from_file( $fullfile, $url, \%pagedata, \$fr ); if ($record_err_msg) { # &ppstr(29, $record_err_msg ); } else { $$ref_crawler_results{$url} = \%pagedata; $count++; } } $err = $gf->quit(0); } return $err; } sub query_file { my ($realm, $query_pattern, $start_pos, $max_results, $ref_crawler_results) = @_; my $err = ''; Err: { $err = &check_regex($query_pattern); next Err if ($err); my ($obj, $p_rhandle, $p_whandle) = (); my ($p_realm_data) = (); ($err, $p_realm_data) = $::realms->hashref($realm); next Err if ($err); my $file = $$p_realm_data{'file'}; $obj = &LockFile_new(); ($err, $p_rhandle) = $obj->Read( $file ); next Err if ($err); my $linecount = -1; while (defined($_ = readline($$p_rhandle))) { next if (($query_pattern) and (not m! u= $query_pattern t= !)); $linecount++; next if ($linecount < $start_pos); last if ($linecount >= ($start_pos + $max_results)); my ($is_valid, %pagedata) = &parse_text_record($_); if ($is_valid) { my $URL = $pagedata{'url'}; $$ref_crawler_results{$URL} = \%pagedata; } } $err = $obj->Close(); next Err if ($err); } return $err; } sub get_remote_host { unless (exists $::private{'remote_host'}) { $::private{'remote_host'} = &query_env('REMOTE_HOST'); if ((!$::private{'remote_host'}) || ($::private{'remote_host'} =~ m!^\d+\.\d+\.\d+\.\d+$!)) { if ($::private{'visitor_ip_addr'} =~ m!^(\d+)\.(\d+)\.(\d+)\.(\d+)$!) { $::private{'remote_host'} = (gethostbyaddr(pack('C4',$1,$2,$3,$4),2))[0] || $::private{'visitor_ip_addr'}; } } $::private{'remote_host'} = lc($::private{'remote_host'}); } return $::private{'remote_host'}; } sub get_absolute_url { my $URL = ''; my $script_name = &query_env('SCRIPT_NAME','/'); if ($ENV{'HTTP_HOST'}) { $URL = 'http://' . &query_env('HTTP_HOST') . $script_name; } elsif ($ENV{'SERVER_NAME'}) { $URL = 'http://' . &query_env('HTTP_HOST') . $script_name; } elsif ($ENV{'HTTP_REFERER'}) { $URL = &query_env('HTTP_REFERER'); $URL =~ s!(\?|\$\|\#)(.*)!!o; } return $URL; } sub print_AddURL_nav_header { my ($b_anon, $action) = @_; if ((not $b_anon) and (not $::const{'is_cmd'}) and ($action ne 'rebuild')) { print <<"EOM";
$::str[96] / $::str[443] / $::str[442]
EOM } } sub s_AddURL { my ($b_IsAnonAdd, $Realm, @addr_strings) = @_; my @AddressesToIndex = (); #changed 0054; support multi-line inputs local $_; foreach (@addr_strings) { foreach (split(m!\r|\n|\015|\012!s)) { my $addr = &Trim($_); next unless ($addr); if ($addr =~ m!^\w+://!) { # good; explicit proto } else { $addr = "http://$addr"; } push( @AddressesToIndex, $addr ); } } my $action = $::FORM{'Action'} || ''; &print_AddURL_nav_header( $b_IsAnonAdd, $action ); my $p_realm_data = (); my $err = ''; Err: { if (($Realm) or ($b_IsAnonAdd)) { ($err, $p_realm_data) = $::realms->hashref($Realm); next Err if ($err); } elsif (($::FORM{'CreateSelectRealm'}) and (not $b_IsAnonAdd)) { my $url; ($err, $url) = &uri_parse($::FORM{'URL'}); next Err if ($err); ($err, $p_realm_data) = $::realms->get_website_realm($url); next Err if ($err); } else { # changed 0064: get_open_realm should always auto-create new realm...s ($err, $p_realm_data) = $::realms->get_open_realm(); next Err if ($err); $Realm = $$p_realm_data{'name'}; } $::FORM{'Realm'} = $$p_realm_data{'name'}; #0035 for benefit of &AdminVersion later if ($$p_realm_data{'type'} == 3) { if (length($$p_realm_data{'limit_pattern'})) { $::FORM{'LimitPattern'} = $$p_realm_data{'limit_pattern'}; } else { $::FORM{'LimitPattern'} = '^' . quotemeta(&get_web_folder($$p_realm_data{'base_url'})); } } elsif ($$p_realm_data{'type'} == 5) { $err = &pstr(277, $$p_realm_data{'html_name'}); next Err; } if (($b_IsAnonAdd) and ($::Rules{'allowanonadd: require user email'})) { $err = &CheckEmail( $::FORM{'EMAIL'} ); next Err if ($err); } # Initialize and validate FORM-based integers: foreach ('Batch','PagesDone','LimitIndexed','LimitFailed','LimitPending') { $::FORM{$_} = 0 unless exists $::FORM{$_}; next if ($::FORM{$_} =~ m!^\d+$!); $err = "parameter '$_' not numeric"; next Err; } foreach ('DaysPast') { $::FORM{$_} = 0 unless exists $::FORM{$_}; next if (($::FORM{$_} =~ m!^\d*\.?\d*$!) and ($::FORM{$_} ne '.')); $err = "parameter '$_' not numeric"; next Err; } my $NextLink = ''; if (($action eq 'CrawlEntireSite') or ($::FORM{'LimitPattern'})) { $::FORM{'Batch'}++; print "\n" . &pstr(186,&he($::FORM{'LimitPattern'}), $$p_realm_data{'html_name'} ) . "
\n";
&ppstr(189, $::FORM{'Batch'} );
print ' ';
&ppstr(191, $::FORM{'LimitIndexed'}, $::FORM{'LimitFailed'}, $::FORM{'LimitPending'} );
print "
" . &pstr(185, $$p_realm_data{'html_name'} ) . "
\n";
&ppstr(188, $::FORM{'DaysPast'} ) if ($::FORM{'DaysPast'});
&ppstr(189, $::FORM{'Batch'} );
print ' ';
&ppstr(191, $::FORM{'LimitIndexed'}, $::FORM{'LimitFailed'}, $::FORM{'LimitPending'} );
print "
" . &pstr(187, $$p_realm_data{'html_name'} ) . "
\n\n"; } $::FORM{'PerBatch'} = $::FORM{'PerBatch'} || $::Rules{'crawler: max pages per batch'}; $::FORM{'PerBatch'} = $::Rules{'crawler: max pages per batch'} if ($::FORM{'PerBatch'} > $::Rules{'crawler: max pages per batch'}); my (@spidered_links, @crawled_pages, %crawler_results, %Response) = (); if (($::FORM{'istimeout'}) and (not $b_IsAnonAdd)) { # shoot... they suffered a timeout... # Are the already only trying one at a time? if so, and if they have multiple addresses waiting, delete the first in the queue: &ppstr(53, $::str[390] ); if (($::FORM{'PerBatch'} == 1) and ($#AddressesToIndex > 0)) { my $URL = $AddressesToIndex[0]; &pppstr(389, $URL ); @AddressesToIndex = (); #@AddressesToIndex[1..$#AddressesToIndex]; #changed 0054 push(@crawled_pages, $URL); my $hURL = &he($URL); my %pagedata = ( 'is_error' => 1, 'url' => $URL, 'err' => 'operation timed out', 'html listing' => "$::str[194]
\n" unless ($::const{'is_cmd'}); my $crawler = &Crawler_new(); my $fr = &fdse_filter_rules_new($p_realm_data); my $b_continue = 1; my $b_write_to_index = 1; my $b_write_to_temp = 0; my $default_approval_required = 0; if (($b_IsAnonAdd) and ($::Rules{'require anon approval'})) { $default_approval_required = 1; $b_write_to_index = 0; $b_write_to_temp = 1; } my ($trailer, $URL) = ('', '', '', '', '', '', ''); my ($pux_err,$source_url,$clean) = (); if ((1 <= $::Rules{'timeout'}) and ($::Rules{'timeout'} <= 12)) { $::Rules{'timeout'} += 10; } my $index_count = 0; my $no_network_errs = 0; my ($is_denied, $require_approval, $promote_val, $filter_err_msg, $no_update_on_redirect, $b_index_nofollow, $b_follow_noindex); $| = 1; ADDRESS: foreach (@AddressesToIndex) { my %pagedata = ( 'realm_id' => $$p_realm_data{'realm_id'}, 'url' => '', 'final url' => '', 'is_error' => 0, 'err' => '', 'require_approval' => $default_approval_required, 'is_intermediate' => 0, 'record' => '', 'html listing' => '', 'sub status msg' => '', 'b_write_to_temp' => $b_write_to_temp, # set default ); $b_continue = 1; $source_url = $URL = &Trim($_); CrawlErr: { if ((($index_count - $no_network_errs) >= $::FORM{'PerBatch'}) or ($no_network_errs >= (5 * $::FORM{'PerBatch'}))) { $trailer = "$::str[201]
\n"; } $| = 0; # If we're a filefed realm, discard all the spidered links: if ($$p_realm_data{'type'} == 2) { @spidered_links = (); } elsif ($::FORM{'LimitPattern'}) { my @new_links = (); my $pattern = $::FORM{'LimitPattern'}; foreach (@spidered_links) { next unless (m!$pattern!i); push(@new_links, $_); } @spidered_links = @new_links; } my (@LN, @LVN, @LVO, @LE) = (); my ($total_records, $new_records, $updated_records, $deleted_records) = ('', 0, 0, 0, 0); if ($b_write_to_index) { ($err, $total_records, $new_records, $updated_records, $deleted_records) = &update_realm( $$p_realm_data{'name'}, \%crawler_results); next Err if ($err); $err = &SaveLinksToFileEx( $p_realm_data, \%crawler_results, \@spidered_links, \@LN, \@LVN, \@LVO, \@LE ); next Err if ($err); } my $approval_count = 0; if ($b_write_to_temp) { my $user_email = $::FORM{'EMAIL'} || ''; my ($obj, $p_whandle) = (); $obj = &LockFile_new(); ($err, $p_whandle) = $obj->Append( $$p_realm_data{'file'} . '.need_approval' ); next Err if ($err); foreach (@crawled_pages) { my $p_pagedata = $crawler_results{$_}; next unless ($$p_pagedata{'require_approval'}); my ($temp_err_msg, $text_record) = ('', ''); unless ($$p_pagedata{'is_error'}) { ($temp_err_msg, $text_record) = &text_record_from_hash($p_pagedata); if ($temp_err_msg) { &ppstr(29, $temp_err_msg ); next; } # strip line breaks: $text_record =~ s!\n|\r|\015|\012!!g; $text_record =~ s!\|\|!\|\|!sg; } my $Record = join('||', $::private{'script_start_time'}, &get_remote_host(), $$p_pagedata{'err'}, $$p_pagedata{'is_error'}, $$p_pagedata{'url'}, $text_record, $user_email); print { $$p_whandle } $Record . "\n"; $approval_count++; } $err = $obj->FinishAppend(); next Err if ($err); } if (($b_IsAnonAdd) and ($::Rules{'allowanonadd: log'})) { my $user_email = $::FORM{'EMAIL'} || ''; my ($obj, $p_whandle) = (); $obj = &LockFile_new(); ($err, $p_whandle) = $obj->Append( 'submissions.csv' ); next Err if ($err); # write schema as first line unless (-s 'submissions.csv') { print { $$p_whandle } "perl_time,human_time,remote_host,remote_addr,visitor_email,URL,realm,error,\n"; } foreach (@crawled_pages) { my $p_pagedata = $crawler_results{$_}; my $record = ''; my $field; foreach $field ( $::private{'script_start_time'}, &FormatDateTime( $::private{'script_start_time'}, $::Rules{'ui: date format'} ), &get_remote_host(), $::private{'visitor_ip_addr'}, $user_email, $$p_pagedata{'url'}, $$p_realm_data{'name'}, $$p_pagedata{'err'}, ) { if ($field =~ m!\"|\015|\012!) { $field =~ s!\"!""!sg; $field = qq!"$field"!; } $record .= "$field,"; } print { $$p_whandle } "$record\n"; } $err = $obj->FinishAppend(); next Err if ($err); } if (($b_IsAnonAdd) and ($::Rules{'allowanonadd: notify admin'})) { MailAdmin: { last MailAdmin unless (($::Rules{'admin notify: smtp server'}) or ($::Rules{'admin notify: sendmail program'})); last MailAdmin unless ($::Rules{'admin notify: email address'}); my $URL = &get_absolute_url(); my $mail_message = ''; $mail_message .= "$::str[205]\015\012\015\012"; $mail_message .= "Visitor Information:\015\012" . '-' x 20 . "\015\012"; $mail_message .= ' ' x (10 - length($::str[206])) . $::str[206] . ": $::FORM{'EMAIL'}\015\012"; $mail_message .= ' ' x (10 - length($::str[207])) . $::str[207] . ": $::private{'visitor_ip_addr'}\015\012"; $mail_message .= ' ' x (10 - length($::str[85])) . $::str[85] . ": " . &get_remote_host() . "\015\012"; $mail_message .= "\015\012"; $mail_message .= "Submitted Page Information:\015\012"; $mail_message .= '-' x length("Submitted Page Information:") . "\015\012"; $mail_message .= ' ' x (10 - length($::str[161])) . $::str[161] . ": $$p_realm_data{'name'}\015\012"; $mail_message .= "\015\012"; my $LastURL = ''; foreach (@crawled_pages) { my $p_pagedata = $crawler_results{$_}; $mail_message .= ' ' x (10 - length($::str[74])) . $::str[74] . ": $$p_pagedata{'url'}\015\012"; if ($$p_pagedata{'err'}) { $mail_message .= ' ' x (10 - length($::str[73])) . $::str[73] . ": $$p_pagedata{'err'}\015\012"; } elsif ($$p_pagedata{'require_approval'}) { $mail_message .= " - $::str[356]\015\012"; } else { $mail_message .= " - OK\015\012"; } $mail_message .= "\015\012"; $LastURL = $$p_pagedata{'url'}; } $mail_message .= "\015\012" . '-' x 78 . "\015\012\015\012"; if ($approval_count) { $mail_message .= "$::str[356]:\n\t$URL?ApproveRealm=$$p_realm_data{'url_name'}"; } else { $mail_message .= $::str[381]; } $mail_message .= <<"EOM"; Fluid Dynamics Search Engine $URL?Mode=Admin ----------------------------------------------------------------------------- EOM foreach (sort keys %::FORM) { next if (m!^(Mode|Match|PagesDone|PerBatch|EMAIL|Realm|URL|Terms|maxhits|p:pm|q|terms)$!); $mail_message .= "$_: $::FORM{$_}\015\012\015\012"; } # Use end-user-address *if* it is valid: my $from_addr = $::Rules{'admin notify: email address'}; unless (&CheckEmail( $::FORM{'EMAIL'} )) { $from_addr = $::FORM{'EMAIL'}; } &SendMailEx( 'handler_order' => '12', 'to' => $::Rules{'admin notify: email address'}, 'to name' => 'FDSE Administrator', 'from' => $from_addr, 'host' => $::Rules{'admin notify: smtp server'}, 'pipeto' => $::Rules{'admin notify: sendmail program'}, 'p_nc_cache' => $::private{'p_nc_cache'}, 'use standard io' => $::Rules{'use standard io'}, 'subject' => &pstr(209, $LastURL ), 'message' => $mail_message, ); } } my $i = 0; ADDRESS: foreach (@crawled_pages) { last if ($::const{'is_cmd'}); my $p_pagedata = $crawler_results{$_}; next if ($$p_pagedata{'is_intermediate'}); $i++; if ($$p_pagedata{'html listing'}) { print $$p_pagedata{'html listing'}; } elsif ($b_IsAnonAdd) { print &StandardVersion('rank' => $i, %$p_pagedata); } else { print &AdminVersion('rank' => $i, %$p_pagedata); } print "[ " . $$p_pagedata{'redirects'} . " ]
\n" if ($$p_pagedata{'redirects'}); print "[ " . $$p_pagedata{'sub status msg'} . " ]
\n" if ($$p_pagedata{'sub status msg'}); } print $trailer; if ($b_write_to_index) { &pppstr(289, $total_records, $$p_realm_data{'html_name'}, $new_records, $updated_records, $deleted_records ); } last Err if ($b_IsAnonAdd); last Err if ($::const{'is_cmd'}); if (($action eq 'rebuild') or ($action eq 'CrawlEntireSite')) { $NextLink .= "&PagesDone=" . ($::FORM{'PagesDone'} + $index_count); my $advice = &pstr(211, $::Rules{'time interval between restarts'}, $NextLink ); print <<"EOM";$::str[210]:
EOM last Err; } #changed 0054 -- allow website, file-fed, filtered, and open realms to use Add New URL form my $count = 0; my $ChooseRealmLine = ''; my $p_data; foreach $p_data ($::realms->listrealms('all')) { next if (($$p_data{'type'} == 4) or ($$p_data{'type'} == 5)); my $type = ''; if ($$p_data{'type'} == 1) { $type = $::str[553]; } elsif ($$p_data{'type'} == 2) { $type = $::str[554]; } elsif ($$p_data{'type'} == 3) { $type = $::str[550]; } $type .= ': ' if $type; $ChooseRealmLine .= qq!\n!; $count++; } if (not $::private{'is_freeware'}) { $ChooseRealmLine .= qq!\n!; } my $formtag = $::const{'AdminForm'}; $formtag =~ s! name="?F1"?!!sg; my $input = ''; if ($::Rules{'multi-line add-url form - visitors'}) { $input = ''; } print <<"EOM";$advice
$::str[172]
EOM my $LinkCount = $#LN + $#LVO + $#LVN + $#LE + 4; unless ($LinkCount) { print "$::str[291]
$formtag
EOM my %defaults = ( 'Realm' => $Realm, ); print &SetDefaults(<<"EOM", \%defaults); $::str[74]: $input EOM print <<"EOM"; $::str[161]:
$::str[213]
\n"; last Err; } print <<"EOM";$::str[214]
$::const{'AdminForm'}"; last Err; } return $err; } sub admin_link { local $_; my (%params) = @_; my $link = $::const{'admin_url'}; my ($name, $value) = (); while (($name, $value) = each %params) { $link .= '&' . &ue($name) . '=' . &ue($value); } return $link; } sub SaveLinksToFileEx { my ($p_realm_data, $ref_crawler_results, $ref_spidered_links, $ref_links_new, $ref_links_visited_fresh, $ref_links_visited_old, $ref_links_error) = @_; my $err = ''; Err: { unless (($p_realm_data) and ('HASH' eq ref($p_realm_data))) { $err = &pstr(21, 'p_realm_data' ); next Err; } # ONLY save those code-0 links if we're a website realm with crawler discovery or we're LimitEntireSite mode: my $b_save_waiting_links = 0; if (($$p_realm_data{'type'} == 3) or (($::FORM{'LimitPattern'}) and ($::FORM{'Action'}) and ($::FORM{'Action'} eq 'CrawlEntireSite'))) { $b_save_waiting_links = 1; } elsif ($$p_realm_data{'type'} == 6) { $b_save_waiting_links = 1; } my $url_realm = $$p_realm_data{'url_name'}; my %return_status = (); my $b_return_status_info = 0; if (($ref_spidered_links) and ($ref_links_new) and ($ref_links_visited_fresh) and ($ref_links_visited_old) and ($ref_links_error)) { $b_return_status_info = 1; } my @Global = (); # Take all pages indexed during this round and assign them a value of the # current time if they were successful and a 2 if they failed. my %written = (); my ($name, $value); while (($name, $value) = each %$ref_crawler_results) { if ($$value{'is_error'}) { push( @Global, "$name $url_realm 2" ); } else { push( @Global, "$name $url_realm $::private{'script_start_time'}" ); } $written{$name} = 1; } if (($ref_spidered_links) and ('ARRAY' eq ref($ref_spidered_links))) { # Add all saved links to this array with a 0 numeric index. Also create an # associative array of them for later comparisons: foreach (@$ref_spidered_links) { next if ($written{$_}); push( @Global, "$_ $url_realm 0" ); $return_status{$_} = 0; $written{$_} = 1; } } last Err unless (@Global); # don't bother if we have nothin to work with... my ($obj, $p_rhandle, $p_whandle) = (); $obj = &LockFile_new( 'create_if_needed' => 1, ); ($err, $p_rhandle, $p_whandle) = $obj->ReadWrite( 'search.pending.txt' ); next Err if ($err); my $b_compare = 1; my $maxi = $#Global; @Global = sort @Global; my $i = 0; my ($insert_url, $insert_realm, $insert_code) = ('','',0); if ($Global[$i] =~ m!^(.+) (\S+) (\d+)$!) { ($insert_url, $insert_realm, $insert_code) = ($1, $2, $3); } my ($last_url, $last_realm, $last_code) = ('', '', 0); my ($cur_url, $cur_realm, $cur_code) = ('', '', 0); my $b_get_next_line = 1; my $file_done = 0; while (1) { if ($b_get_next_line) { if (defined($_ = readline($$p_rhandle))) { next unless (m!^(.+) (\S+) (\d+)$!); ($cur_url, $cur_realm, $cur_code) = ($1, $2, $3); } elsif ($i <= $maxi) { $file_done = 1; $cur_url = 'z'; $b_get_next_line = 0; } else { last; } } else { $b_get_next_line = 1; # unless the incoming records explicitly reset it to 0 } # If we are different than our predecessors, we print out predecessors and take on their role. We are now pred and will be compared to new input # If we are the same, then we resolve which us of is superior, and loop next, without printing # This is done to weed out multiple sequential duplicates in the pending file if (($file_done) or ("$last_url $last_realm" ne "$cur_url $cur_realm")) { if ($b_compare) { # Right before we print, we check whether we should insert the current insert record. # If the current record falls before this one, we insert clean # If the current insert record is equal to this one, we fight it out and winner writes if ("$insert_url $insert_realm" lt "$last_url $last_realm") { # okay, insert clean: print { $$p_whandle } "$insert_url $insert_realm $insert_code\n" if (($b_save_waiting_links) or ($insert_code)); $i++; if ($i > $maxi) { $b_get_next_line = 1; $b_compare = 0; } else { $Global[$i] =~ m!^(.+) (\S+) (\d+)$!; ($insert_url, $insert_realm, $insert_code) = ($1, $2, $3); $b_get_next_line = 0; # give the next guy in @Global a chance next; } } elsif ("$insert_url $insert_realm" eq "$last_url $last_realm") { $last_code = $insert_code if (($insert_code > $last_code) or ($insert_code == 2)); $return_status{$insert_url} = $last_code if (defined($return_status{$insert_url})); $i++; if ($i > $maxi) { $b_get_next_line = 1; $b_compare = 0; } else { $Global[$i] =~ m!^(.+) (\S+) (\d+)$!; ($insert_url, $insert_realm, $insert_code) = ($1, $2, $3); $b_get_next_line = 0; # give the next guy in @Global a chance next; } } } print { $$p_whandle } "$last_url $last_realm $last_code\n" if (($last_url) and ($last_url ne 'z')); ($last_url, $last_realm, $last_code) = ($cur_url, $cur_realm, $cur_code); } else { $last_code = $cur_code if ($cur_code > $last_code); } $b_get_next_line = 1; } # end loop print { $$p_whandle } "$last_url $last_realm $last_code\n" if (($last_url) and ($last_url ne 'z')); $err = $obj->Merge(); next Err if ($err); last Err unless ($b_return_status_info); my $cut_age = $::private{'script_start_time'} - (86400 * $::Rules{'crawler: days til refresh'}); my $url; while (($url, $value) = each %return_status) { if ($value == 0) { push( @$ref_links_new, $url ); } elsif ($value == 2) { push( @$ref_links_error, $url ); } elsif ($value < $cut_age) { push( @$ref_links_visited_old, $url ); } else { push( @$ref_links_visited_fresh, $url ); } } } return $err; } sub get_age_str { my ($age) = @_; my $age_str = ''; $age += 59; # round up if ($age > (2 * 86400)) { $age_str = &pstr(220, int($age / 86400) ); } elsif ($age > (100 * 60)) { $age_str = &pstr(222, int($age / 3600) ); } else { $age_str = &pstr(221, int($age / 60) ); } $age_str; } sub realm_interact { my ($p_realm_data, $p_code) = @_; %$p_code = (); $::private{'embedded_err_msg'} = ''; # Start-up routines: $$p_code{'init'} = <<'EOM'; $obji = &LockFile_new(); ($::private{'embedded_err_msg'}, $p_rhandlei, $p_whandlei) = $obji->ReadWrite( $$p_realm_data{'file'} ); EOM $$p_code{'resume'} = <<'EOM'; $obji = &LockFile_new(); ($::private{'embedded_err_msg'}, $p_rhandlei, $p_whandlei) = $obji->Resume( $$p_realm_data{'file'} ); EOM # Shut-down routines: $$p_code{'finish'} = <<'EOM'; $::private{'embedded_err_msg'} = $obji->Merge(); EOM $$p_code{'abort'} = <<'EOM'; $::private{'embedded_err_msg'} = $obji->Cancel(); EOM $$p_code{'suspend'} = <<'EOM'; $::private{'embedded_err_msg'} = $obji->Suspend(); EOM # Getnext code: $$p_code{'get_next'} = <<'EOM'; unless ($index_is_done) { while (1) { unless (defined($_ = readline($$p_rhandlei))) { $index_is_done = 1; last; } if (m!^(\d+) (\d+) (\d+).+?u= (.*?) t=!) { ($i_url, $i_lastmodt) = ($4, $2); $i_line++; $record = $_; last; } } } EOM $$p_code{'insert'} = $$p_code{'update'} = <<'EOM'; my ($xrecord_err, $xrecord) = &text_record_from_hash( \%pagedata ); if ($xrecord_err) { &ppstr(29,$xrecord_err); } else { unless (print { $$p_whandlei } $xrecord) { $write_err = &pstr(43, $obji->{'wname'}, $! ); } $pagecount++; } EOM $$p_code{'delete'} = <<'EOM'; # do nothing EOM $$p_code{'preserve'} = <<'EOM'; unless (print { $$p_whandlei } $record) { $write_err = &pstr(43, $obji->{'wname'}, $! ); } $pagecount++; EOM } sub UpdateIndex { my ($p_realm_data) = @_; my $err = ''; my $is_complete = 0; Err: { local $_; # Create a list of all files and their last modified times: my $i_line = 0; my $a_line = 0; my $fr = &fdse_filter_rules_new($p_realm_data); my $gf = &GetFiles_new(); $err = $gf->create_file_list( 'base_dir' => $$p_realm_data{'base_dir'}, 'base_url' => $$p_realm_data{'base_url'}, 'fr' => \$fr, 'tempfile' => $$p_realm_data{'file'} . ".temp_file_list.txt", 'verbose' => 1, ); &pppstr(224, $gf->{'count'}, $$p_realm_data{'base_dir'} ); # Open the realm index file for purposes of looping through it and re-writing it: my %code = (); &realm_interact( $p_realm_data, \%code ); my ($obji, $p_rhandlei, $p_whandlei, $record, $record_err, $pagecount, $write_err) = (); eval $code{'init'}; die $@ if $@; if ($::private{'embedded_err_msg'}) { $err = $::private{'embedded_err_msg'}; next Err; } # Okay, proceed through the double parallel loop my ($a_url, $a_file, $a_lastmodt) = ('', '', 0); my ($i_url, $i_lastmodt) = ('', 0); my $index_is_done = 0; my $getnext = 2; $| = 1; my ($size, $basefile) = (); my $i_url_prev = ''; my %crawler_results = (); my %valid = ( 'is_error' => 0, ); my %invalid = ( 'is_error' => 1, ); # $a_url and $a_lastmodt refer to the *actual* sorted files in the folder # $i_url and $i_lastmodt refer to the contents of the current index file, which may be out-of-date DREAD: while (1) { last if ($write_err); my %pagedata = (); if ($getnext == 2) { ($a_lastmodt, $size, $a_file, $basefile, $a_url) = $gf->get_next_file(); last DREAD unless ($a_url); $a_line++; $i_url_prev = $i_url; eval $code{'get_next'}; die $@ if $@; if ($::private{'embedded_err_msg'}) { $err = $::private{'embedded_err_msg'}; next Err; } } elsif ($getnext == 1) { ($a_lastmodt, $size, $a_file, $basefile, $a_url) = $gf->get_next_file(); last DREAD unless ($a_url); $a_line++; } elsif ($getnext == 0) { $i_url_prev = $i_url; eval $code{'get_next'}; die $@ if $@; if ($::private{'embedded_err_msg'}) { $err = $::private{'embedded_err_msg'}; next Err; } } if ($i_url lt $i_url_prev) { # fatal/die - alpha sort lost eval $code{'cancel'}; die $@ if $@; if ($::private{'embedded_err_msg'}) { $err = $::private{'embedded_err_msg'}; next Err; } $err = $::str[225] . ' (' . &he($i_url) . ' versus previous ' . &he($i_url_prev) . ')'; next Err; } my $action = ''; if ($a_url eq $i_url) { if ($a_lastmodt != $i_lastmodt) { $record_err = (&pagedata_from_file( $a_file, $a_url, \%pagedata, \$fr ))[0]; if ($record_err) { &ppstr(29, &he($a_url) . ' - ' . $record_err); print "\n\n"; $action = 'delete'; } else { &pppstr(226, $a_url ); $pagedata{'lastindex'} = $::private{'script_start_time'}; $action = 'update'; } } else { $action = 'preserve'; } $getnext = 2; } elsif (($a_url lt $i_url) or ($index_is_done)) { $getnext = 1; my $index_url = ''; ($record_err, $index_url) = &pagedata_from_file( $a_file, $a_url, \%pagedata, \$fr ); if ($record_err) { &ppstr(29, &he($a_url) . ' - ' . $record_err); print "\n\n"; } else { &pppstr(227, &he($a_url) ); $pagedata{'lastindex'} = $::private{'script_start_time'}; $action = 'insert'; $crawler_results{$index_url} = \%valid; } } elsif ($a_url gt $i_url) { &pppstr(228, &he($i_url) ); $getnext = 0; $action = 'delete'; $crawler_results{$a_url} = \%invalid; } if ($action) { eval $code{$action}; die $@ if $@; if ($::private{'embedded_err_msg'}) { $err = $::private{'embedded_err_msg'}; next Err; } } } $err = $gf->quit(0); next Err if ($err); $is_complete = 1; if ($write_err) { &ppstr(29, $write_err ); eval $code{'abort'}; die $@ if $@; if ($::private{'embedded_err_msg'}) { $err = $::private{'embedded_err_msg'}; next Err; } last Err; } eval $code{'finish'}; die $@ if $@; if ($::private{'embedded_err_msg'}) { $err = $::private{'embedded_err_msg'}; next Err; } $err = &SaveLinksToFileEx( $p_realm_data, \%crawler_results ); next Err if ($err); $err = $::realms->setpagecount( $$p_realm_data{'name'}, $pagecount, 1 ); next Err if ($err); &ppstr(174, $::str[229] ); last Err; } return ($err, $is_complete); } sub BuildIndex { my ($p_realm_data) = @_; my $is_complete = 0; my $err = ''; Err: { my $start_pos = 0; if (($::FORM{'StartFile'}) and ($::FORM{'StartFile'} =~ m!^\d+$!)) { $start_pos = $::FORM{'StartFile'}; } # These hashes are used later to update pending.txt via SaveLinksToFileEx my %crawler_results = (); my %valid = ('is_error' => 0); my %invalid = ('is_error' => 1); # This loads the generic realm update code, which will be eval'ed: my $i_line = $start_pos; my %code = (); &realm_interact( $p_realm_data, \%code ); my ($obji, $p_rhandlei, $p_whandlei, $record, $record_err, $pagecount, $write_err) = (); if ($start_pos > 0) { eval $code{'resume'}; if ($::private{'embedded_err_msg'}) { $err = $::private{'embedded_err_msg'}; next Err; } } else { eval $code{'init'}; if ($::private{'embedded_err_msg'}) { $err = $::private{'embedded_err_msg'}; next Err; } } die $@ if $@; $| = 1; &pppstr(391, $$p_realm_data{'html_name'} ); my $fr = &fdse_filter_rules_new($p_realm_data); &pppstr(487, &he($::Rules{'ext'}), "$::const{'admin_url'}&Action=GeneralRules&Edit=Ext" ); my $gf = &GetFiles_new(); $err = $gf->create_file_list( 'base_dir' => $$p_realm_data{'base_dir'}, 'base_url' => $$p_realm_data{'base_url'}, 'fr' => \$fr, 'tempfile' => $$p_realm_data{'file'} . ".temp_file_list.txt", 'use_existing' => 1, 'verbose' => 1, ); next Err if ($err); $::FORM{'TotalValidFiles'} = $gf->{'count'}; &pppstr(224, $::FORM{'TotalValidFiles'}, $$p_realm_data{'base_dir'} ); if ($start_pos) { &pppstr(230, $start_pos ); $gf->resume_file_position( $start_pos ); } else { print "$::str[215]
EOM if ($::FORM{'LimitPattern'}) { my $hval = &he($::FORM{'LimitPattern'}); print qq!\n!; } print <<"EOM";EOM $LinkCount = 1; if (@LN) { print "$::str[216]
\n"; foreach (sort @LN) { my $html_url = &he( $_ ); print qq! $html_url
\n!; $LinkCount++; } } if (@LE) { print "$::str[217]
\n"; foreach (sort @LE) { my $html_url = &he( $_ ); print qq! $html_url
\n!; $LinkCount++; } } if (@LVO) { &pppstr(218, $::Rules{'crawler: days til refresh'} ); foreach (sort @LVO) { my $html_url = &he( $_ ); print qq! $html_url
\n!; $LinkCount++; } } if (@LVN) { &pppstr(219, $::Rules{'crawler: days til refresh'} ); foreach (sort @LVN) { my $html_url = &he( $_ ); print qq! $html_url
\n!; $LinkCount++; } } print "
$::str[231]
"; } $::FORM{'truecount'} = 0 unless ($::FORM{'truecount'}); my $NextLink = "$::const{'admin_url'}&Action=rebuild&TotalValidFiles=$::FORM{'TotalValidFiles'}&Realm=$$p_realm_data{'url_name'}"; &pppstr(192, qq!$::str[193]! ); my $infile_count = $start_pos; my $success_count = $start_pos; my $intro = <<"EOM";$::str[113] | $::str[153] | $::str[369] | $::str[74] |
---|---|---|---|
$::str[73]: '$basename' - $err. | |||
$fileage | $html_Size bytes | $var[0] | $var[1] |
$::str[210]:
EOM &pppstr(105, &FormatNumber( $::FORM{'truecount'}, 0, 1, 0, 1, $::Rules{'ui: number format'} ) ); } else { $err = $gf->quit(0); next Err if ($err); eval $code{'finish'}; die $@ if $@; if ($::private{'embedded_err_msg'}) { $err = $::private{'embedded_err_msg'}; next Err; } $err = $::realms->setpagecount($$p_realm_data{'name'}, $::FORM{'truecount'}, 1); delete $::FORM{'truecount'}; next Err if ($err); &ppstr(174, $::str[229] ); $is_complete = 1; } $err = &SaveLinksToFileEx( $p_realm_data, \%crawler_results ); next Err if ($err); &pppstr(232, time() - $::private{'script_start_time'} ); last Err; } return ($err, $is_complete); } sub AdminVersion { my %pagedata = @_; my $ue_url = &ue( $pagedata{'url'} ); my $type = 1; my $ue_realm = ''; my ($err, $p_realm_data) = $::realms->hashref($::FORM{'Realm'}); if ((not $err) and ($p_realm_data)) { $ue_realm = $$p_realm_data{'url_name'}; $type = $$p_realm_data{'type'}; } if ($type == 5) { # runtime - $pagedata{'admin_options'} = ''; } elsif ($type == 4) { # file-system; edit, delete, no crawl $pagedata{'admin_options'} = <<"EOM"; [ $::str[411] | $::str[430] ] EOM } else { $pagedata{'admin_options'} = <<"EOM"; [ $::str[411] | $::str[444] | $::str[430] ] EOM } $pagedata{'redirector'} = "$::const{'script_name'}?NextLink="; return &StandardVersion(%pagedata); } sub ui_ReviewIndex { my $err = ''; Err: { my $p_realm_data = (); ($err, $p_realm_data) = $::realms->hashref($::FORM{'Realm'}); next Err if ($err); my $start_pos = $::FORM{'Start'} || 1; my $max_results_to_show = $::Rules{'crawler: max pages per batch'}; print <<"EOM";$advice
$::str[96] / $::str[154] '$$p_realm_data{'html_name'}'
EOM my %crawler_results = (); $err = &query_realm( $$p_realm_data{'name'}, '', $start_pos - 1, $max_results_to_show, \%crawler_results); next Err if ($err); my $URL = ''; my $total = $start_pos - 1 + scalar (keys %crawler_results); my $linkhits = "$::const{'admin_url'}&Realm=$$p_realm_data{'url_name'}&Action=Review&Start="; my $b_is_exact_count = 1; my $maximum = $$p_realm_data{'pagecount'}; if (($total) and (not ($$p_realm_data{'pagecount'}))) { $maximum = $total; $b_is_exact_count = 0; } my ($jump_sum, $jumptext) = &str_jumptext( $start_pos, $max_results_to_show, $maximum, $linkhits, $b_is_exact_count ); my $Count = $start_pos; my $nresults = scalar (keys %crawler_results); if ($nresults) { print $jump_sum; print $jumptext; foreach (sort (keys %crawler_results)) { my $p_data = $crawler_results{$_}; $$p_data{'rank'} = $Count; print &AdminVersion(%$p_data); $Count++; } print $jump_sum; print $jumptext; } else { print "$::str[235]
\n"; } if ($Count < ($start_pos + $max_results_to_show)) { print "$::str[236]: $::str[238].
\n"; } last Err; } continue { &ppstr(29, $err ); } } sub ui_UserInterface { my $err = ''; Err: { local $_; my $subaction = $::FORM{'subaction'} || ''; my %subactions = ( '' => $::str[152], 'EditTemplate' => $::str[411], 'SaveTemplate' => $::str[362], 'SaveSettings' => $::str[362], 'Write' => $::str[362], 'IL' => $::str[351], ); if (defined($subactions{$subaction})) { print "$::str[96] / $::str[165] / $subactions{$subaction}
\n"; } my %name_to_file = ( 'Link Line 1' => 'linkline1.txt', 'Link Line 2' => 'linkline2.txt', 'Line Listing' => 'line_listing.txt', 'Main Footer' => 'footer.htm', 'Main Header' => 'header.htm', 'Search Form' => 'searchform.htm', 'Search Tips' => 'tips.htm', 'Style Sheet' => 'style.inc', ); my %name_to_desc = ( 'Line Listing' => $::str[237], 'Main Footer' => $::str[239], 'Main Header' => $::str[240], 'Search Form' => $::str[241], 'Search Tips' => $::str[242], 'Style Sheet' => $::str[243], 'Link Line 1' => $::str[171], 'Link Line 2' => $::str[169], ); if ($subaction eq 'IL') { # install language pack... my @langfiles_over = ( 'admin_ads.txt', 'admin_fr.txt', 'admin_fr2.txt', 'admin_pass1.txt', 'admin_pass2.txt', 'admin_personal.txt', 'admin_ui.txt', 'strings.txt', ); my @langfiles_preserve_old = ( 'linkline1.txt', 'linkline2.txt', 'searchform.htm', 'tips.htm', ); my $foldername = $::FORM{'fn'}; if ($foldername =~ m!\W!) { $err = &pstr(350,&he($foldername)); next Err; } unless (-d "templates/$foldername") { unless (mkdir("templates/$foldername",0777)) { $err = &pstr(349,&he("templates/$foldername"),$!); next Err; } chmod(0777, "templates/$foldername"); # for good measure - sometimes needed } &pppstr(347, &pstr(346, $foldername, $::VERSION)); my $base_path = "http://www.xav.com/latest/translator/$::VERSION/$foldername"; # temporarily set some overrides $::Rules{'crawler: rogue'} = 1; $::Rules{'max characters: file'} = &max($::Rules{'max characters: file'},16777216); $::Rules{'crawler: max redirects'} = 6; $::Rules{'minimum page size'} = 0; my $crawler = &Crawler_new(); my $langfile; foreach $langfile (@langfiles_over) { print "-> $::str[195] $langfile...
\n"; my %webrq = $crawler->webrequest( "page" => "$base_path/$langfile" ); if ($webrq{'err'}) { &ppstr(29, $webrq{'err'} ); &pppstr(345, $base_path, "searchdata/templates/$foldername"); last Err; } $err = &WriteFile( "templates/$foldername/$langfile", $webrq{'text'} ); next Err if ($err); } foreach $langfile (@langfiles_preserve_old) { if (-e "templates/$foldername/$langfile") { &pppstr(347, &pstr(344, $langfile)); next; } print "-> $::str[195] $langfile...
\n"; my %webrq = $crawler->webrequest( "page" => "$base_path/$langfile" ); if ($webrq{'err'}) { &ppstr(29, $webrq{'err'} ); &pppstr(345, $base_path, "searchdata/templates/$foldername"); last Err; } $err = &WriteFile( "templates/$foldername/$langfile", $webrq{'text'} ); next Err if ($err); } &ppstr(174, &pstr(343, $foldername)); my $cache = 'valid_languages_cache.txt'; if (-e $cache) { unlink($cache); } last Err; } if ($subaction eq 'EditTemplate') { my $template = $::FORM{'template'}; my $html_template = &he( $template ); unless ($name_to_file{ $template }) { $err = &pstr(244, $html_template ); next Err; } my $text = ''; my $file = ''; if (-e "templates/$::Rules{'language'}/$name_to_file{ $template }") { $file = "templates/$::Rules{'language'}/$name_to_file{ $template }"; } elsif (-e "templates/$name_to_file{ $template }") { $file = "templates/$name_to_file{ $template }"; } else { $err = &pstr(245, $name_to_file{ $template } ); next Err; } ($err, $text) = &ReadFile( $file ); next Err if ($err); # Collapse multiple line breaks: $text =~ s!\015\012!\012!sg; $text =~ s!\015!\012!sg; $text =~ s!\012+!\n!sg; $text = &he( $text ); my $html_file = &he( $file ); my $descr = &pstr(246, $html_template, $html_file ); print <<"EOM"; $::const{'AdminForm'}$descr
Click here to view the default text for this template (offsite link; opens in new window).
EOM last Err; } elsif ($subaction eq 'SaveTemplate') { if ($::private{'is_demo'}) { &ppstr(53, $::str[435] ); last Err; } my $template = $::FORM{'template'}; unless ($name_to_file{ $template }) { $err = &pstr(244, &he($template) ); next Err; } my $file = "templates/$::Rules{'language'}/$name_to_file{ $template }"; if ((-e "templates/$name_to_file{ $template }") and (not (-e $file))) { $file = "templates/$name_to_file{ $template }"; } $err = &WriteFile( $file, $::FORM{'filetext'} ); next Err if ($err); &ppstr(174, &pstr(469,$name_to_file{$template})); last Err; } elsif ($subaction eq 'SaveSettings') { if ($::private{'is_demo'}) { &ppstr(53, $::str[435] ); last Err; } my $old_lang = $::Rules{'language'}; my $new_lang = $::FORM{'language'}; $::FORM{'ui: search form display'} = 2 * $::FORM{'sfp2'} + $::FORM{'sfp1'}; foreach ('language', 'ui: number format', 'ui: date format','ui: search form display') { if (not defined($::FORM{$_})) { $err = "invalid argument - required parameter '$_' is not defined"; next Err; } $err = &WriteRule($_, $::FORM{$_}); next Err if ($err); } if ($old_lang ne $new_lang) { &ppstr( 174, &pstr(357, &he( $old_lang, $new_lang ) ) ); } &ppstr(174,$::str[114]); last Err; } elsif ($subaction eq 'SS2') { print "$::str[96] / $::str[165] / $::str[473] / $::str[362]
\n"; if ($::private{'is_demo'}) { &ppstr(53, $::str[435] ); last Err; } my $b_need_rebuild = 0; foreach ('character conversion: accent insensitive', 'character conversion: case insensitive') { $b_need_rebuild = 1 if ($::Rules{$_} ne $::FORM{$_}); $err = &WriteRule($_, $::FORM{$_}); next Err if ($err); } &ppstr(174,$::str[114]); print '' . $::str[109] . '
' if ($b_need_rebuild); last Err; } elsif ($subaction eq 'viewmap') { print "$::str[96] / $::str[165] / $::str[473]
\n"; my $ex = '&' . 'uuml'; print &SetDefaults(<<"EOM",\%::Rules);$::str[473] ($::str[432])
$::const{'AdminForm'}$::str[481] | $::str[60] | |
---|---|---|
m$ex;ller == mueller | ||
m$ex;ller != mueller | ||
$::str[481] | $::str[60] | |
Miller == miller | ||
Miller != miller |
$::str[485]
$::str[484]
$::str[483]
$::str[482]
EOM &create_conversion_code(1); last Err; } print "$::str[163]
\n"; print "$::str[164]
\n"; $err = &ui_GeneralRules( $::str[165], 'UserInterface', 'default match', 'default search terms', 'default substring match', 'hits per page', 'show examples: enable', 'show examples: number to display', 'handling url search terms', 'no frames', 'sorting: randomize equally-relevant search results', 'sorting: default sort method', 'sorting: time sensitive', 'user language selection', ); next Err if ($err); my %support_lang = ( 'ar' => 'العربية', 'bs' => 'Bosanski', 'dutch' => 'Nederlands', 'english' => 'English', 'fi' => 'Finnish', 'french' => 'Français', 'german' => 'Deutsch', 'italian' => 'Italiano', 'lv' => 'Latviski', 'nb' => 'Norsk-bokmål', 'portuguese' => 'Português', 'ro' => 'Romanian', 'ru' => 'Ьяээцфх', 'sl' => 'Slovenski', 'spanish' => 'Español', 'sr' => 'Srpski', 'sv' => 'Svenska', 'tl' => 'Tagalog', 'tr' => 'Türkçe', ); my $lang_opt = ''; if (opendir(DIR, 'templates')) { my @folders = sort readdir(DIR); closedir(DIR); foreach (@folders) { next unless (-e "templates/$_/strings.txt"); unless (open(FILE, "' . $::str[470] . " $::str[46]" . '
'; $defaults{'html_search_code'} = $code; $defaults{'html_simple_code'} = $::Rules{'simple_code'}; $defaults{'html_link_code'} = $::Rules{'link_code'}; my $text = &PrintTemplate( 1, 'admin_ui.txt', $::Rules{'language'}, \%defaults ); $::Rules{'sfp1'} = $::Rules{'ui: search form display'} % 2; $::Rules{'sfp2'} = ($::Rules{'ui: search form display'} < 2) ? 0 : 1; print &SetDefaults($text, \%::Rules); last Err; } continue { &ppstr(29, $err ); } } sub save_custom_metadata { my ($url, %metadata) = @_; my $err = ''; Err: { unless ($::Rules{'use dbm routines'}) { $err = $::str[328]; next Err; } eval { my %custom = (); dbmopen( %custom, 'custom_metadata', 0666 ) || die &pstr( 43, 'custom_metadata', $! ); if (%metadata) { my $str = ''; my @pairs = (); foreach (keys %metadata) { push(@pairs, "$_=" . &ue($metadata{$_}) ); } $str = join( ' ', @pairs ); $custom{$url} = $str; } else { delete $custom{$url}; } dbmclose( %custom ); }; if ($@) { $err = &pstr(20, &he($@), "$::const{'help_file'}1169.html" ); } last Err; } return $err; } sub ui_EditRecord { my $err = ''; Err: { my $sa = $::FORM{'sa'} || ''; if ($sa eq 'save_all') { print <<"EOM";$::str[96] / $::str[327] / $::str[99] / Persist All Metadata / $::str[362]
EOM unless ($::Rules{'use dbm routines'}) { $err = $::str[328]; next Err; } eval { my %custom = (); dbmopen( %custom, 'custom_metadata', 0666 ) || die &pstr( 43, 'custom_metadata', $! ); my $p_realm; foreach $p_realm ($::realms->listrealms('has_index_data')) { my $count = 0; print "Status: opening realm $p_realm->{'html_name'}.
\n"; open(FILE, "<$p_realm->{'file'}") || die $!; binmode(FILE); while (defined($_ =Status: finished with $count records.
\n"; } dbmclose( %custom ); }; if ($@) { $err = &pstr(20, &he($@), "$::const{'help_file'}1169.html" ); } else { print "Success: saved all metadata as persistent customizations.
\n"; } last Err; } if ($sa eq 'delete_all') { print <<"EOM";$::str[96] / $::str[327] / $::str[99] / Delete All Customizations / $::str[362]
EOM unless ($::Rules{'use dbm routines'}) { $err = $::str[328]; next Err; } eval { my %custom = (); dbmopen( %custom, 'custom_metadata', 0666 ) || die &pstr( 43, 'custom_metadata', $! ); %custom = (); dbmclose( %custom ); }; if ($@) { $err = &pstr(20, &he($@), "$::const{'help_file'}1169.html" ); } else { print "Success: deleted all persistent customizations.
\n"; } last Err; } if ($sa eq 'write') { my $p_realm_data = (); ($err, $p_realm_data) = $::realms->hashref($::FORM{'Realm'}); next Err if ($err); my ($old_url,$new_url) = ('', ''); ($err,$old_url) = &uri_parse($::FORM{'EditURL'}); next Err if ($err); ($err,$new_url) = &uri_parse($::FORM{'url'}); next Err if ($err); my $uurl = &ue($new_url); print <<"EOM";$::str[96] / $::str[327] / $::str[99] / $::str[324] / $::str[362]
EOM foreach ('title','description','keywords') { $::FORM{$_} = '' unless (defined($::FORM{$_})); $::FORM{$_} =~ s!\r|\n|\=!!sg; $::FORM{$_} =~ s!\!>!sg; $::FORM{$_} =~ s!\"!"!sg; } my %crawler_results = (); my %pagedata; if ($old_url ne $new_url) { # Okay, they're doing a rename. well this is a little more tricky # lookup a full %pagedata hash on the old record # build a new insert %pagedata hash with the new meta-info # build a 'is_error' %pagedata hash forl the old url $err = &query_realm( $$p_realm_data{'name'}, quotemeta($old_url), 0, 1, \%crawler_results ); next Err if ($err); unless ($crawler_results{$old_url}) { $err = &pstr(249,&he($old_url),$$p_realm_data{'html_name'} ); next Err; } #end changes # updated record: my $p_pagedata = $crawler_results{$old_url}; %pagedata = %$p_pagedata; $pagedata{'is_error'} = 0; $pagedata{'url'} = $new_url; $pagedata{'title'} = $::FORM{'title'}; $pagedata{'description'} = $::FORM{'description'}; $pagedata{'keywords'} = $::FORM{'keywords'}; # kill record: my %kill = ( 'is_error' => 1, 'url' => $old_url, ); $crawler_results{ $old_url } = \%kill; } else { %pagedata = ( 'is_error' => 0, 'is_update' => 1, 'url' => $new_url, 'new_url' => $new_url, 'title' => $::FORM{'title'}, 'description' => $::FORM{'description'}, 'keywords' => $::FORM{'keywords'}, ); } $pagedata{'size'} = $::FORM{'size'}; unless ($pagedata{'size'} =~ m!^\d+$!) { $err = &pstr(69,'size',0,999999); next Err; } $pagedata{'promote'} = $::FORM{'promote'}; unless ($pagedata{'promote'} =~ m!^\d+$!) { $err = &pstr(69,'promote',1,99); next Err; } $crawler_results{$new_url} = \%pagedata; my ($total_records, $new_records, $updated_records, $deleted_records) = (0, 0, 0, 0); ($err, $total_records, $new_records, $updated_records, $deleted_records) = &update_realm( $$p_realm_data{'name'}, \%crawler_results ); next Err if ($err); &pppstr(289, $total_records, $$p_realm_data{'html_name'}, $new_records, $updated_records, $deleted_records ); my $html_code = &he(<<"EOM");$::str[96] / $::str[327] / $::str[99] / $::str[263]
EOM $err = &check_regex($query_pattern); next Err if ($err); $query_pattern = ".*$query_pattern.*" unless ($query_pattern =~ m!\.\*!); my $p_realm_data = (); ($err, $p_realm_data) = $::realms->hashref($::FORM{'Realm'}); next Err if ($err); my %crawler_results = (); $err = &query_realm( $$p_realm_data{'name'}, $query_pattern, 0, 1000000, \%crawler_results ); # changed 0072 next Err if ($err); my @match_urls = sort (keys %crawler_results); my $query_count = scalar @match_urls; &pppstr(273, &he($query_pattern), $query_count ); last Err if ($query_count == 0); my $x = 0; foreach (@match_urls) { $x++; print &AdminVersion( 'rank' => $x, %{ $crawler_results{$_} }, ); } last Err; } if ($sa eq 'delete') { print <<"EOM";$::str[96] / $::str[327] / $::str[99] / $::str[323] / $::str[95]
EOM unless ($::Rules{'use dbm routines'}) { $err = $::str[328]; next Err; } local $_; foreach (keys %::FORM) { next unless (m!^del:(.+)$!); my $url = $1; $err = &save_custom_metadata( $url ); next Err if ($err); } &ppstr(174,$::str[267]); print $::str[322]; last Err; } if ($sa eq 'list') { print <<"EOM";$::str[96] / $::str[327] / $::str[99] / $::str[323] / $::str[152]
EOM unless ($::Rules{'use dbm routines'}) { $err = $::str[328]; next Err; } eval { my %data_by_url = (); my %realm_by_url = (); dbmopen( %data_by_url, 'custom_metadata', 0666 ) || die &pstr( 43, 'custom_metadata', $! ); my $count = scalar keys %data_by_url; my ($obj, $p_rhandle) = (); $obj = &LockFile_new( 'create_if_needed' => 1, ); ($err, $p_rhandle) = $obj->Read('search.pending.txt'); next Err if ($err); while (defined($_ = readline($$p_rhandle))) { next unless (m!^(\S+) (\S+) (\d+)(\r|\n|\015|\012)$!s); if ((defined($data_by_url{$1})) and ($3 > 2)) { $realm_by_url{$1} = $2; } } $err = $obj->Close(); next Err if ($err); unless ($count) { print "$::str[266]
\n"; } else { print <<"EOM"; $::const{'AdminForm'}$::str[74] | Actions | ||
---|---|---|---|
$hurl |
$::str[411] | ||
$hurl |
$::str[265] | ||
$attrib: | $value |
||
$::str[96] / $::str[327] / $::str[99] / $::str[324] / $::str[152]
EOM my $file = $$p_realm_data{'file'}; if ($$p_realm_data{'is_runtime'}) { $err = $::str[248]; next Err; } my $pattern = quotemeta($EditURL); my %crawler_results = (); $err = &query_realm( $$p_realm_data{'name'}, $pattern, 0, 1, \%crawler_results ); next Err if ($err); unless (%crawler_results) { $err = &pstr(249,&he($EditURL),$$p_realm_data{'html_name'} ); next Err; } my $r_pagedata = $crawler_results{$EditURL}; # this is just to set the checkbox defaults properly in the edit form... my %metadata = (); $err = &load_custom_metadata($EditURL, \%metadata); next Err if ($err); foreach ('title','description','keywords') { next unless (defined($metadata{$_})); $$r_pagedata{"persist_$_"} = 1; } print ''; print &AdminVersion('rank' => 1, %$r_pagedata); print ''; print &SetDefaults(<<"EOM",$r_pagedata); $::const{'AdminForm'}
$::str[250]: | |
$::str[74]: | |
$::str[153]: | bytes |
$::str[251]: | |
$::str[45]: | |
$::str[151]: | |
$::str[253]
$::str[250] | |
$::str[45] | |
$::str[151] |
$::str[254]
$::str[255]
$::str[256]
EOM } else { print <<"EOM";$::str[96] / $::str[327] / $::str[99] / $::str[152]
EOM my ($count, $html_hidden, $html_tr) = $::realms->html_select_ex('has_index_data', '', 'fdtan', 120); unless ($count) { $err = $::str[257]; next Err; } print <<"EOM";$::str[258]
$::str[259] | |
---|---|
$::str[161]: |
EOM
my $p_temp_data = ();
foreach $p_temp_data ($::realms->listrealms('has_index_data')) {
print "" . &he( $$p_temp_data{'name'} ) . " ($$p_temp_data{'pagecount'}) \n"; } print <<"EOM"; |
$::str[260] | |
---|---|
$::str[261]: |
$::str[264]
EOM &pppstr(247, "$::const{'admin_url'}&Action=Edit&sa=list") if ($::Rules{'use dbm routines'}); } last Err; } return $err; } sub DeleteFromPending { my ($realm, $p_urls) = @_; my $delcount = 0; my $err = ''; Err: { local $_; my $pattern = '^('; if (($p_urls) and ('ARRAY' eq ref($p_urls))) { foreach (@$p_urls) { $pattern .= quotemeta($_) . '|'; } $pattern =~ s!\|$!!o; $pattern .= ') '; } else { $pattern .= '.*) '; } if ($realm) { $pattern .= quotemeta(&ue($realm)); } else { $pattern .= '(\S+)'; } $pattern .= ' \d+$'; my ($obj, $p_rhandle, $p_whandle) = (); $obj = &LockFile_new( 'create_if_needed' => 1, ); ($err, $p_rhandle, $p_whandle) = $obj->ReadWrite('search.pending.txt'); next Err if ($err); while (defined($_ = readline($$p_rhandle))) { if (m!$pattern!o) { $delcount++; next; } print { $$p_whandle } $_; } $err = $obj->Merge(); next Err if ($err); } return ($err, $delcount); } sub ui_DeleteRecord { my $err = ''; Err: { unless ($::FORM{'Realm'}) { print <<"EOM";$::str[96] / $::str[327] / $::str[95] / $::str[152]
EOM my ($count, $html_hidden, $html_tr) = $::realms->html_select_ex('has_index_data', '', 'fdtan', 120); unless ($count) { $err = $::str[257]; next Err; } print <<"EOM";$::str[258]
$::str[259] | |
---|---|
$::str[161]: |
EOM
my $p_temp_data = ();
foreach $p_temp_data ($::realms->listrealms('has_index_data')) {
print qq!$$p_temp_data{'html_name'} ($$p_temp_data{'pagecount'}) \n!; } print <<"EOM"; |
$::str[260] | |
---|---|
$::str[261]: |
$::str[264]
EOM last Err; } my @urls_to_delete = (); while (defined($_ = each %::FORM)) { next unless (m!^URL\d*$!); push(@urls_to_delete, $::FORM{$_}); } my $p_realm_data = (); ($err, $p_realm_data) = $::realms->hashref($::FORM{'Realm'}); next Err if ($err); my $query_pattern = $::FORM{'query_pattern'}; $query_pattern = defined($query_pattern) ? $query_pattern : ''; my $html_query_pattern = &he( $query_pattern ); my %pagedata = (); my %crawler_results = (); if (@urls_to_delete) { print <<"EOM";$::str[96] / $::str[327] / $::str[95] / $::str[430]
EOM my $URL = ''; foreach $URL (@urls_to_delete) { my %pagedata = ( 'url' => $URL, 'is_error' => 1, ); $crawler_results{$URL} = \%pagedata; } my ($total_records, $new_records, $updated_records, $deleted_records) = (0, 0, 0, 0); ($err, $total_records, $new_records, $updated_records, $deleted_records) = &update_realm( $$p_realm_data{'name'}, \%crawler_results ); next Err if ($err); my $delcount = 0; ($err, $delcount) = &DeleteFromPending( $$p_realm_data{'name'}, \@urls_to_delete ); next Err if ($err); &ppstr(174, &pstr(178,$delcount,'search.pending.txt')); print "\n"; foreach $URL (sort keys %crawler_results) { my $r_pagedata = $crawler_results{$URL}; if ($$r_pagedata{'sub status msg'}) { print "URL '" . &he($URL) . "' - $$r_pagedata{'sub status msg'}\n"; &pppstr(289, $total_records, $$p_realm_data{'html_name'}, $new_records, $updated_records, $deleted_records ); my $default_forbid_url = $urls_to_delete[0]; if ($query_pattern) { $default_forbid_url = $query_pattern; } $default_forbid_url = &he($default_forbid_url); &ppstr(269, '
\n"; } else { print "$::str[73]: "; &ppstr(249, &he($URL), $$p_realm_data{'html_name'} ); print ".
\n"; } } print "
<meta name="robots" content="none" />', <<"EOM"); $::const{'AdminForm'}
$::str[261]: | |
$::str[270]
\n"; my $x = 0; while (1) { $x++; last if ($x > 10); if ($temp_url =~ m!^http://(.*)/!) { $temp_url = "http://$1"; print "$::str[271] " . &he($temp_url) . "/.*.
\n"; next; } last; } } last Err; } if ($query_pattern) { print <<"EOM";$::str[96] / $::str[327] / $::str[95] / $::str[263]
$::str[272]
EOM $err = &check_regex($query_pattern); next Err if ($err); $query_pattern = ".*$query_pattern.*" unless ($query_pattern =~ m!\.\*!); my %crawler_results = (); $err = &query_realm( $$p_realm_data{'name'}, $query_pattern, 0, 1000000, \%crawler_results ); next Err if ($err); my @kill_us = sort (keys %crawler_results); my $query_count = scalar @kill_us; &pppstr(273, &he($query_pattern), $query_count ); last Err if ($query_count == 0); print <<"EOM"; $::const{'AdminForm'}$::str[274]
EOM my $x = 0; foreach (@kill_us) { $x++; my $hurl = &he($_); print qq! $hurl$::str[275]
EOM last Err; } last Err; } continue { &ppstr(29, $err ); } } sub s_CrawlEntireSite { local $_; my ($Realm) = @_; my @ReIndex = (); my ($Count, $Limit) = (0, 2 * $::Rules{'crawler: max pages per batch'}); # limit is 2*; we send extra since URL's kicked out by first-pass filter rules aren't counted against total my $is_complete = 0; my $err = ''; Err: { $::FORM{'LimitFailed'} = $::FORM{'LimitIndexed'} = $::FORM{'LimitPending'} = 0; my ($obj, $p_rhandle) = (); $obj = &LockFile_new(); ($err, $p_rhandle) = $obj->Read('search.pending.txt'); next Err if ($err); my $matchRealm = quotemeta( &ue($Realm) ); my $cutTime = $::FORM{'StartTime'}; if ($::FORM{'DaysPast'}) { $cutTime -= (86400 * $::FORM{'DaysPast'}); } my $qm_limit = $::FORM{'LimitPattern'}; while (defined($_ = readline($$p_rhandle))) { next unless (m!^(.*?) $matchRealm (\d+)!); my ($URL, $time) = ($1, $2); next unless ($URL =~ m!$qm_limit!i); if ($time == 2) { $::FORM{'LimitFailed'}++; } elsif ($time >= $cutTime) { $::FORM{'LimitIndexed'}++; } else { $::FORM{'LimitPending'}++; push(@ReIndex,$URL) unless ($Count > $Limit); $Count++; } } $err = $obj->Close(); next Err if ($err); unless (@ReIndex) { &print_AddURL_nav_header( 0, $::FORM{'Action'} || '' ); &ppstr(174, $::str[276] ); $is_complete = 1; last Err; } $err = &s_AddURL(0, $Realm, @ReIndex); next Err if ($err); last Err; } continue { &ppstr(29, $err ); } return ($err, $is_complete); } sub ui_Rebuild { my $realm = $::FORM{'Realm'} || ''; my ($err, $is_complete) = ('', 0); my $b_clear_err = 1; if ($::const{'is_cmd'}) { $::Rules{'timeout'} = 0; # no timeout in command-line rebuilds delete $::FORM{'LimitPattern'}; # changed 0062 while (1) { ($err, $is_complete) = &rebuild_realm( $realm, $b_clear_err ); last if ($is_complete); $b_clear_err = 0; # don't rebuild on subsequent iterations last if ($err); } } else { # don't clear err if this looks like a secondary request in a multi-request rebuild... $b_clear_err = ((exists($::FORM{'PagesDone'})) or (exists($::FORM{'StartFile'}))) ? 0 : 1; &rebuild_realm( $realm, $b_clear_err ); } } sub rebuild_realm { my ($realm, $b_clear_err) = @_; my $is_complete = 0; my $err = ''; Err: { local $_; $::FORM{'LimitFailed'} = $::FORM{'LimitIndexed'} = $::FORM{'LimitPending'} = 0; # Initialize and validate FORM-based integers: foreach ('StartTime') { $::FORM{$_} = 0 unless exists $::FORM{$_}; next if ($::FORM{$_} =~ m!^\d+$!); $err = "parameter '$_' not numeric"; next Err; } foreach ('DaysPast') { $::FORM{$_} = 0 unless exists $::FORM{$_}; next if (($::FORM{$_} =~ m!^\d*\.?\d*$!) and ($::FORM{$_} ne '.')); $err = "parameter '$_' not numeric"; next Err; } if ($::const{'is_cmd'}) { &pppstr(185, $realm ); } else { print qq!$::str[96] / !; &ppstr(185, &he($realm) ); print "
\n"; } my $p_realm_data = (); ($err, $p_realm_data) = $::realms->hashref($realm); next Err if ($err); if ($b_clear_err) { # clear the error cache: my $error_lines = 0; ($err, $error_lines) = &clear_error_cache(); next Err if ($err); } # What does "rebuild" mean? Well, it depends on the type of realm we're dealing with: my $type = $$p_realm_data{'type'}; if ($type == 5) { # runtime realm; all dynamic data, no index; cannot rebuild $err = &pstr(277, $$p_realm_data{'html_name'} ); $is_complete = 1; next Err; } elsif ($type == 4) { # website realm w/ file system if ($::FORM{'DaysPast'}) { ($err, $is_complete) = &UpdateIndex( $p_realm_data ); } else { ($err, $is_complete) = &BuildIndex( $p_realm_data ); } next Err if ($err); last Err; } # Logic is different is we're rebuilding *all* pages or re-indexing old pages. # For website realms and filefed realms, a "rebuild" includes the full discovery process. A "re-index" only consists of re-indexing known pages that haven't been visited lately. # For "open" realms, the rebuild/re-index is essentially the same except for the time, since there is no discovery process for open realms. unless ($::FORM{'DaysPast'}) { # Okay this is a "rebuild": if ($type == 3) { # a website-realm which is handled via the crawler: unless ($::FORM{'LimitPattern'}) { unless ($::FORM{'StartTime'}) { $::FORM{'StartTime'} = $::private{'script_start_time'} - 5; } if ($$p_realm_data{'limit_pattern'}) { $::FORM{'LimitPattern'} = $$p_realm_data{'limit_pattern'}; } else { $::FORM{'LimitPattern'} = '^' . quotemeta(&get_web_folder($$p_realm_data{'base_url'})); } $err = &s_AddURL(0, $$p_realm_data{'name'}, $$p_realm_data{'base_url'}); next Err if ($err); last Err; } ($err, $is_complete) = &s_CrawlEntireSite($$p_realm_data{'name'}); next Err if ($err); last Err; } } if ($type == 2) { # ahh, a filefed realm # 4 steps: # 1. request start file and extract all links # 2. delete all entries from search.pending.txt; replace them with new links array, using code "10" # 3. delete all index data # 4. initiate normal "index-all-old-pages" process for this realm unless ($::FORM{'StartTime'}) { &pppstr(278, $$p_realm_data{'base_url'} ); my @fresh_links = (); my $crawler = &Crawler_new(); my @saved = ($::Rules{'crawler: follow query strings'}, $::Rules{'crawler: follow offsite links'}, $::Rules{'max characters: file'}, $::Rules{'crawler: rogue'}); ($::Rules{'crawler: follow query strings'}, $::Rules{'crawler: follow offsite links'}, $::Rules{'max characters: file'}, $::Rules{'crawler: rogue'}) = (1, 1, &max($::Rules{'max characters: file'},16777216),1); my %Response = $crawler->webrequest( 'page' => $$p_realm_data{'base_url'} ); if ($Response{'err'}) { $err = $Response{'err'}; next Err; } my %pagedata = (); &parse_html_ex( $Response{'text'}, $Response{'final_url'}, 1, \@fresh_links, \%pagedata); ($::Rules{'crawler: follow query strings'}, $::Rules{'crawler: follow offsite links'}, $::Rules{'max characters: file'}, $::Rules{'crawler: rogue'}) = @saved; my %fresh_uniq_links = (); foreach (@fresh_links) { $fresh_uniq_links{$_}++; } @fresh_links = sort (keys %fresh_uniq_links); my $count = scalar @fresh_links; my %expired_urls = (); &pppstr(279, $count ); # delete all entries: my ($obj, $p_rhandle, $p_whandle) = (); $obj = &LockFile_new( 'create_if_needed' => 1, ); my %orig_times = (); ($err, $p_rhandle, $p_whandle) = $obj->ReadWrite('search.pending.txt'); next Err if ($err); my $i = 0; my $get_next = 1; my $file_done = 0; my ($u,$r,$c) = (); while (($file_done == 0) or ($fresh_links[$i])) { if (($get_next) and ($file_done == 0)) { if (defined($_ = readline( $$p_rhandle ))) { next unless (m!^(.*?) (\S+) (\d+)$!); ($u,$r,$c) = ($1, $2, $3); if ($r eq $$p_realm_data{'url_name'}) { if ($fresh_uniq_links{$u}) { # still valid if ($::FORM{'DaysPast'}) { # preserve original index times $orig_times{$u} = $c; } } else { $expired_urls{$u} = 1; } next; } } else { $file_done = 1; $_ = ''; $u = 'z'; } } $get_next = 1; if (($fresh_links[$i]) and ("$u $r" gt "$fresh_links[$i] $$p_realm_data{'url_name'}")) { my $timecode = defined($orig_times{$fresh_links[$i]}) ? $orig_times{$fresh_links[$i]} : 0; unless (print { $$p_whandle } "$fresh_links[$i] $$p_realm_data{'url_name'} $timecode\n") { $err = &pstr( 43, $obj->{'wname'}, $! ); $obj->Cancel(); next Err; } $i++; $get_next = 0; next; } unless (print { $$p_whandle } $_) { $err = &pstr( 43, $obj->{'wname'}, $! ); $obj->Cancel(); next Err; } } $err = $obj->Merge(); next Err if ($err); # step 3 -- kill expired URL's # delete all expired entries: $obj = &LockFile_new( 'create_if_needed' => 1, ); ($err, $p_rhandle, $p_whandle) = $obj->ReadWrite( $$p_realm_data{'file'} ); next Err if ($err); while (defined($_ = readline( $$p_rhandle ))) { next unless (m!^.*? u= (.*?) t=!); my $url = $1; next if ($expired_urls{$url}); unless (print { $$p_whandle } $_) { $err = &pstr( 43, $obj->{'wname'}, $! ); $obj->Cancel(); next Err; } } $err = $obj->Merge(); next Err if ($err); } } unless ($::FORM{'StartTime'}) { $::FORM{'StartTime'} = $::private{'script_start_time'} - 5; } my @list = (); my $count = 0; my $age = $::FORM{'StartTime'}; if ($::FORM{'DaysPast'}) { $age -= (86400 * $::FORM{'DaysPast'}); } $err = &GetCrawlList( $$p_realm_data{'name'}, $age, 2 * $::Rules{'crawler: max pages per batch'}, \@list, \$count ); next Err if ($err); unless (@list) { # Well, we're done print "$::str[280]
\n"; $is_complete = 1; last Err; } $err = &s_AddURL(0, $$p_realm_data{'name'}, @list ); next Err if ($err); last Err; } continue { &ppstr(29, $err ); } return ($err, $is_complete); } sub GetCrawlList { my ( $realm, $age, $max_list_size, $p_list, $p_count) = @_; my $err = ''; Err: { local $_; #&Assert( 'ARRAY' eq ref( $p_list ) ); #&Assert( 'SCALAR' eq ref( $p_count ) ); my ($obj, $p_rhandle) = (); $obj = &LockFile_new( 'create_if_needed' => 1, ); ($err, $p_rhandle) = $obj->Read('search.pending.txt'); next Err if ($err); my $pattern = quotemeta( &ue( $realm ) ); $$p_count = 0; while (defined($_ = readline($$p_rhandle))) { next unless (m!^(.*?) $pattern (\d+)!); my ($URL, $time) = ($1, $2); if ($time == 2) { $::FORM{'LimitFailed'}++; } elsif ($time > $age) { $::FORM{'LimitIndexed'}++; } else { $::FORM{'LimitPending'}++; push(@$p_list, $URL) if ($$p_count < $max_list_size); $$p_count++; } } $err = $obj->Close(); next Err if ($err); } return $err; } sub Authenticate { my ($crypt_pass) = @_; my ($is_auth, $form_password, $url_password) = (1, '', ''); my $sn = &query_env('SCRIPT_NAME'); my $seed = 'sX'; my $test_cookie = '0'; my $session_lifetime = 60 * $::Rules{'security: session timeout'}; my $grace_period = int($session_lifetime / 6); my %auth_tokens = (); my ($status_msg, $public_token) = ('',''); my $pri_token = exists($::FORM{'CP'}) ? $::FORM{'CP'} : ''; my $is_cookies_aware = 0; my $clear_cookie = 0; if (&query_env('HTTP_COOKIE') =~ m!fdse_cp=([^\;]+)!) { $is_cookies_aware = 1; my $auth_cookie = &ud($1); if ($auth_cookie ne $test_cookie) { $pri_token = $auth_cookie; } } my $b_is_api = ((exists($ENV{'FDSE_NO_EXEC'})) and (not exists($ENV{'SERVER_SOFTWARE'})) and (not exists($ENV{'SCRIPT_NAME'})) and (not exists($ENV{'HTTP_HOST'}))) ? 1 : 0; if ($b_is_api) { $::const{'is_cmd'} = 1; } my $b_print_status_only = 0; Auth: { # next for auth failure: # changed 0063 if (($b_is_api) and ($::private{'trust_api'})) { last Auth; } if ((exists($::FORM{'Action'})) and ($::FORM{'Action'} eq 'LogOut')) { $status_msg = &pstr(174,$::str[102]); if ($pri_token) { my $cpass = crypt($pri_token, $seed); if ($cpass eq '0') { my $temp_err_msg = "Perl crypt() function returned literal '0' - you have an incomplete Perl crypt installation. If you are running Lunix 2.2.16 with Perl 5.6.1, please upgrade with latest patches or downgrade to Perl 5.6.0"; $status_msg = &pstr(29, "$::str[282] - '$temp_err_msg'" ); next Auth; } delete $auth_tokens{$cpass}; &write_tokens(%auth_tokens); # no error check } next Auth; } # Is the user setting a new password? - they will still return AUTH_FAIL, but this will set the text message to an appropriate value: unless ($crypt_pass) { if (($::FORM{'new_pass_1'}) or ($::FORM{'new_pass_2'})) { $::FORM{'new_pass_1'} = $::FORM{'new_pass_1'} || ''; $::FORM{'new_pass_2'} = $::FORM{'new_pass_2'} || ''; $crypt_pass = 1; if ($::FORM{'new_pass_1'} ne $::FORM{'new_pass_2'}) { $status_msg = &pstr(29,$::str[285]); $b_print_status_only = 1; next Auth; } my $cpass = crypt($::FORM{'new_pass_1'}, $seed); if ($cpass eq '0') { my $temp_err_msg = "Perl crypt() function returned literal '0' - you have an incomplete Perl crypt installation. If you are running Linux 2.2.16 with Perl 5.6.1, please upgrade with latest patches or downgrade to Perl 5.6.0"; $status_msg = &pstr(29, "$::str[282] - '$temp_err_msg'" ); $b_print_status_only = 1; next Auth; } my ($temp_err_msg) = &WriteRule('password', $cpass); if ($temp_err_msg) { $status_msg = &pstr(29, "$::str[282] - '$temp_err_msg'" ); $b_print_status_only = 1; } else { $status_msg = &pstr(174, $::str[283] ); } } next Auth; } #changed 0054 - let 'Password' override 'CP' if ((exists $::FORM{'Password'}) and (length($::FORM{'Password'}))) { if (crypt($::FORM{'Password'}, $seed) ne $crypt_pass) { $status_msg = &pstr(29,$::str[181]); next Auth; } # the user provided a valid password; give that man a token! $pri_token = ''; foreach (1..8) { $pri_token .= chr(ord('a') + int(rand(26))); } my $cpass = crypt($pri_token, $seed); if ($cpass eq '0') { my $temp_err_msg = "Perl crypt() function returned literal '0' - you have an incomplete Perl crypt installation. If you are running Lunix 2.2.16 with Perl 5.6.1, please upgrade with latest patches or downgrade to Perl 5.6.0"; $status_msg = &pstr(29, "$::str[282] - '$temp_err_msg'" ); next Auth; } $public_token = $cpass; ($status_msg, %auth_tokens) = &read_tokens(); if ($status_msg) { $status_msg = &pstr(29, $status_msg); next Auth; } $auth_tokens{$public_token} = time() + $session_lifetime; $status_msg = &write_tokens(%auth_tokens); if ($status_msg) { $status_msg = &pstr(29, $status_msg); next Auth; } last Auth; } if ($pri_token) { ($status_msg, %auth_tokens) = &read_tokens(); if ($status_msg) { $status_msg = &pstr(29, $status_msg); next Auth; } my $cpass = crypt($pri_token, $seed); if ($cpass eq '0') { my $temp_err_msg = "Perl crypt() function returned literal '0' - you have an incomplete Perl crypt installation. If you are running Lunix 2.2.16 with Perl 5.6.1, please upgrade with latest patches or downgrade to Perl 5.6.0"; $status_msg = &pstr(29, "$::str[282] - '$temp_err_msg'" ); next Auth; } $public_token = $cpass; unless ($auth_tokens{$public_token}) { $status_msg = &pstr(29, $::str[281]); next Auth; } my $expire_time = $auth_tokens{$public_token}; if ($expire_time < time) { $status_msg = '' . $::str[284] . '
'; $clear_cookie = 1 if ($is_cookies_aware); next Auth; } elsif (($expire_time - $grace_period) < time) { # this token is about to expire; set a fresh one: $pri_token = ''; foreach (1..8) { $pri_token .= chr(ord('a') + int(rand(26))); } my $cpass = crypt($pri_token, $seed); if ($cpass eq '0') { my $temp_err_msg = "Perl crypt() function returned literal '0' - you have an incomplete Perl crypt installation. If you are running Lunix 2.2.16 with Perl 5.6.1, please upgrade with latest patches or downgrade to Perl 5.6.0"; $status_msg = &pstr(29, "$::str[282] - '$temp_err_msg'" ); next Auth; } $public_token = $cpass; $auth_tokens{$public_token} = time() + $session_lifetime; $status_msg = &write_tokens(%auth_tokens); if ($status_msg) { $status_msg = &pstr(29, $status_msg); next Auth; } } last Auth; } } continue { # AUTH_FAIL unless ($::const{'is_cmd'}) { &header_add( "Set-Cookie: fdse_cp=; path=$sn" ) if ($clear_cookie); &header_print( "Set-Cookie: fdse_cp=$test_cookie; path=$sn" ); print <<"EOM";FOOTER $is_auth = 0; } if ($is_auth) { if ($is_cookies_aware) { &header_add( "Set-Cookie: fdse_cp=" . &ue( $pri_token ) . "; path=$sn" ); } else { $url_password = "&CP=" . &ue($pri_token); $form_password = ''; } } return ($is_auth, $form_password, $url_password); } sub read_tokens { my %tokens = (); my $err = ''; Err: { local $_; my $text = ''; if (-e 'auth_tokens.txt') { ($err, $text) = &ReadFile('auth_tokens.txt'); next Err if ($err); } foreach (split(m!\015\012!s, $text)) { next unless (m!Token: (\S+); Expires: (\d+)!); $tokens{$1} = $2; } } return ($err,%tokens); } sub write_tokens { my %tokens = @_; my $text = ''; my ($token, $expires) = (); while (($token, $expires) = each %tokens) { next if ($expires < time()); $text .= "Token: $token; Expires: $expires\015\012"; } return &WriteFile('auth_tokens.txt', $text); } sub WriteRule { my $name = $_[0]; my $value = defined($_[1]) ? $_[1] : 0; my $err = ''; Err: { last Err if ($::Rules{$name} eq $value); my $FDR = &FD_Rules_new(); my ($is_valid, $valid_value) = $FDR->_fdr_validate($name, $value); unless ($is_valid) { $err = &pstr(170,&he($name,$value)); next Err; } $valid_value =~ s!(\r|\n|\015|\012)! !sg; # all line breaks become spaces my $default_value = $FDR->{'r_defaults'}->{$name}->[0]; # changed 0068 - strip defaults my $b_strip = ($valid_value eq $default_value) ? 1 : 0; my $text = ''; my $text_new = ''; if (-e $FDR->{'file'}) { ($err, $text) = &ReadFileL( $FDR->{'file'} ); next Err if ($err); } my $qm_name = quotemeta($name); my $blank_line_count = 0;#changed 0068 - prevent blank-line buildup local $_; foreach (split(m!\n!s, $text)) { next if (m!^\s*$qm_name\s*=!i); if (m!^\s*$!s) { $blank_line_count++; next if ($blank_line_count > 2); } else { $blank_line_count = 0; } $text_new .= "$_\n"; } unless ($b_strip) { $text_new .= "$name=$valid_value\n"; } $err = &WriteFile( $FDR->{'file'}, $text_new ); next Err if ($err); $::Rules{$name} = $valid_value; } return $err; } sub clear_error_cache { my $error_lines = 0; my $err = ''; Err: { my ($obj, $p_rhandle, $p_whandle) = (); $obj = &LockFile_new( 'create_if_needed' => 1, ); ($err, $p_rhandle, $p_whandle) = $obj->ReadWrite('search.pending.txt'); next Err if ($err); while (defined($_ = readline($$p_rhandle))) { if (m! 2$!) { $error_lines++; next; } unless (print { $$p_whandle } $_) { $err = &pstr(43,$obj->get_wname(),$!); $obj->Cancel(); next Err; } } $err = $obj->Merge(); next Err if ($err); last Err; } return ($err, $error_lines); } sub ui_DataStorage { my $err = ''; Err: { print <<"EOM";Fluid Dynamics Search Engine v$::VERSION
$::str[96] / $::str[292] EOM my $status_msg = ''; my $is_error = 0; my $subaction = $::FORM{'subaction'} || ''; if ($subaction eq 'VAO') { print " / Verify Alphabetic Order
\n"; my $p_realm; ($err, $p_realm) = $::realms->hashref( $::FORM{'Realm'} ); next Err if ($err); if ($p_realm->{'type'} != 4) { $err = "subaction $subaction is only available for realms of type $::str[366]"; next Err; } #require my $lib = 'common_test.pl'; delete $INC{$lib}; require $lib; if (&version_test() ne $::VERSION) { $err = "the library '$lib' is not version $::VERSION"; next Err; } #/require $err = &test_file_based_index( $p_realm->{'file'}, 1 ); next Err if ($err); last Err; } if ($subaction eq 'ClearError') { print " / $::str[332]\n"; my $error_lines = 0; ($err, $error_lines) = &clear_error_cache(); next Err if ($err); &ppstr(174, &pstr(178,$error_lines,'search.pending.txt')); last Err; } if ($subaction eq 'ReviewPending') { print " / $::str[294]\n"; my %valid_realms = (); my %true_count = (); my %err_count = (); my $error_count = 0; my $total_count = 0; my %kill_waiting = (); my %wait_count = (); my $p_realm_data = (); foreach $p_realm_data ($::realms->listrealms('all')) { my ($count, $url_name, $html_name) = ($$p_realm_data{'pagecount'}, &ue($$p_realm_data{'name'}), &he($$p_realm_data{'name'})); $valid_realms{$url_name} = $count; $true_count{$url_name} = 0; $wait_count{$url_name} = 0; $err_count{$url_name} = 0; $kill_waiting{$url_name} = ($$p_realm_data{'type'} == 3) ? 0 : 1; } my ($obj, $p_rhandle, $p_whandle) = (); $obj = &LockFile_new( 'create_if_needed' => 1, ); ($err, $p_rhandle, $p_whandle) = $obj->ReadWrite('search.pending.txt'); next Err if ($err); my $invalid_lines = 0; my $old_realms = 0; my $prev_url = ''; local $_; while (defined($_ = readline($$p_rhandle))) { unless (m!^http://(.*) (\S+) (\d+)\r?$!) { next; } my ($url, $realm, $time) = ("http://$1", $2, $3); if ($time == 2) { $error_count++; $err_count{$realm}++; } else { unless ($valid_realms{$realm}) { $realm = &he( &ud( $realm ) ); &ppstr(53, &pstr(295, $realm ) ); $old_realms++; next; } elsif (($time == 0) and ($kill_waiting{$realm})) { next; } if ($url lt $prev_url) { &ppstr(53, $::str[296] ); &pppstr(297, $url, $prev_url ); next; } $true_count{$realm}++ if ($time > 10); $wait_count{$realm}++ if ($time == 0); } $total_count++; $prev_url = $url; print { $$p_whandle } $_; } $err = $obj->Merge(); next Err if ($err); if ($invalid_lines) { &pppstr(298, $invalid_lines ); } if ($old_realms) { &pppstr(299, $old_realms ); } &ppstr(174, $::str[355] ); &pppstr(301, $total_count, $error_count ); print <<"EOM";$::str[428] | $::str[146] | $::str[302] | $::str[303] | $::str[304] | $::str[305] |
---|---|---|---|---|---|
$display_name | $::str[306] | $::str[307] $action_VAO | $truecount | $pagecount | $wait_count{$name} | $err_count{$name} |
$::str[45]:
$::str[308] EOM last Err; } if ($subaction eq 'rmdupe') { print " / $::str[294] / $::str[307]\n"; my $p_realm_data = (); ($err, $p_realm_data) = $::realms->hashref($::FORM{'Realm'}); next Err if ($err); if ($$p_realm_data{'is_runtime'}) { $err = &pstr(277, $$p_realm_data{'html_name'} ); next Err; } # Get a list of all pages in the realm - import them into the pending file my %crawler_results = (); my $count = 0; my $dupes = 0; my ($obj, $p_rhandle, $p_whandle) = (); my %pages = (); $obj = &LockFile_new( 'create_if_needed' => 1, ); ($err, $p_rhandle, $p_whandle) = $obj->ReadWrite( $$p_realm_data{'file'} ); next Err if ($err); while (defined($_ = readline( $$p_rhandle ))) { next unless (m! u= (.+?) t=!); if ($pages{$1}) { &pppstr(310, $1 ); $dupes++; } else { $count++; print { $$p_whandle } $_; } $pages{$1}++; } $err = $obj->Merge(); next Err if ($err); &pppstr(311, $dupes ); &pppstr(313, $$p_realm_data{'html_name'}, $count ); $err = $::realms->setpagecount( $$p_realm_data{'name'}, $count, 1); next Err if ($err); last Err; } if ($subaction eq 'sync') { print " / $::str[294] / $::str[306]\n"; my $p_realm_data = (); ($err, $p_realm_data) = $::realms->hashref($::FORM{'Realm'}); next Err if ($err); my $url_realm = $$p_realm_data{'url_name'}; if ($$p_realm_data{'is_runtime'}) { $err = &pstr(277, $$p_realm_data{'html_name'} ); next Err; } # Get a list of all pages in the realm - import them into the pending file my %crawler_results = (); my $count = 0; my ($obj, $p_rhandle, $p_whandle) = (); my %valid = ( 'is_error' => 0 ); $obj = &LockFile_new( 'create_if_needed' => 1, ); ($err, $p_rhandle) = $obj->Read( $$p_realm_data{'file'} ); next Err if ($err); while (defined($_ = readline( $$p_rhandle ))) { next unless (m! u= (.+?) t=!); if ($crawler_results{$1}) { &ppstr(53, $::str[317] ); &pppstr(318); } else { $crawler_results{$1} = \%valid; } $count++; } $err = $obj->Close(); next Err if ($err); print "$::str[319]
\n"; foreach (sort keys %crawler_results) { print &he($_) . "$::str[333]
Warning: this registration key is for 'Genesis' instead of FDSE.
\n"; } return 0; } unless ($pri =~ s!Prod: FDSE!!sg) { if (($pri =~ m!Prod: (\w+)!s) and ($1 ne 'FDSE')) { print "Warning: this registration key is for '$1' instead of FDSE.
\n"; } return 0; } $pri =~ s!\r|\n!!sg; $pub =~ s!\r|\n!!sg; if (&Trim($pub) eq &Trim($pri)) { $is_valid = 1; } } return $is_valid; } sub html_select_ex { my ($self, $attrib, $default, $class, $width1) = @_; my ($count, $html_hidden, $html_tr) = (0, '', ''); $count = $self->realm_count($attrib); my $p_list = $self->{'realms'}; my $p_hash; if ($count == 1) { foreach $p_hash (@$p_list) { next unless ($$p_hash{$attrib}); $html_hidden = ''; last; } } elsif ($count > 1) { $default = '' unless (defined($default)); my $options = ''; foreach $p_hash ($self->listrealms($attrib)) { if ($default eq $$p_hash{'name'}) { $options .= qq!!; } else { $options .= qq!!; } } if ($class) { $html_tr = qq!Status: applying API_GET_WEBROOT correction for netfirms.com; appending '/www' to the path.
\n" if ($b_verbose); $path .= '/www'; } #virtualave corr: elsif (($sa eq 'webmaster@virtualave.net') and ($path eq '/home') and (-d '/home/public_html')) { print "Status: applying API_GET_WEBROOT correction for virtualave; appending '/public_html' to the path.
\n" if ($b_verbose); $path = '/home/public_html'; } #portland.co.uk elsif (($sa eq 'support@portland.co.uk') and (&query_env('SCRIPT_FILENAME') =~ m!^/host/(.*)/([\w\-]+).portland.co.uk/!)) { print "Status: applying API_GET_WEBROOT correction /home/$2 for portland.co.uk.
\n" if ($b_verbose); $path = "/home/$2"; } } elsif (defined($ENV{'SCRIPT_NAME'})) { # this approach will fail on multi-homed {x}/cgi-bin, {x}/public_html # that option usually only happens with Apache which always tends to have DOCUMENT_ROOT though my $forwardpath = $0; $forwardpath =~ s!\\!/!g; my $qmsn = quotemeta($ENV{'SCRIPT_NAME'}); if ($forwardpath =~ m!^(.*)$qmsn!i) { $path = $1; } } if ($path) { if (not -e $path) { $path = ''; print "Status: best-case match of '$path' is invalid because it failed the -e existence test.
\n" if ($b_verbose); } elsif (not -d $path) { $path = ''; print "Status: best-case match of '$path' is invalid because it failed the -d is-directory test.
\n" if ($b_verbose); } elsif ($path =~ m!^(.+)$!) { $path = $1; # untaint } } return $path; } sub max { my $max = $_[0]; local $_; foreach (@_) { $max = $_ if ($_ > $max); } return $max; } sub network_error_msg { my ($reason) = @_; return '' if ($::FORM{'Mode'} eq 'AnonAdd'); return qq~ ($::str[167])~; } 1;