#[BOFN]###############################################################################
#
#
#Pagenews - a free script to publish news on websites
#Copyright (C) 2004,2005,2006,2007,2008 Philipp Kindt
#
#This file is part of Pagenews.
#
# 	 This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#[EOFN]###############################################################################


#this is the don CGI tool package

package doncgitools;
$allready = 0;			#browser Data allready read?
########################################################################################################
#escapes whitespaces and special chars (planned) in a way a normal browser can read it
#(Url encoding)
#text in $_[1], escaped string as return
sub escape_url{
	my $txt = $_[0];
	$txt =~ s/\ /\%20/g;
	return $txt;
}
########################################################################################################
#reads in browser cgi parameters - decodes it and returns it as a hash
#Version: 19. 12. 2004 - Philipp Kindt
#the standard function for CGIs
#you can force a request method by giving "GET" or "POST" in $_[1]
sub get_browser_data{
	#1. read data from Browser data. This might be either Post or get.
	#THIS ROUTINE DOES NOT SUPPORT COMBINED METHOD DECODING. CHOOSE EITHER "GET" OR "POST". "HEAD" (and anything else) is interpreted as a "POST"
	my $raw_data;					#Buffer for data as delivered by HTTP
	my %in;
	my %qs_in;
	


	#which method has been requested is written in $ENV{'REQUEST_METHOD'}
	my $rm = $ENV{'REQUEST_METHOD'};
	if($_[1] eq "GET"){
		$rm = "GET";
	}elsif($_[1] eq "POST"){
		$rm = "POST";
	}

	#we add this to %in in every case:
	$raw_data = $ENV{'QUERY_STRING'};
	%qs_in = decode_browser_data($raw_data);
	if($rm eq "GET"){
		#in this case, the data is in the "QUERY_STRING" environment variable
		return %qs_in;
	}else{	
		#if it is "POST", we have to read from STANDARD IN. The number of bytes is in the "CONTENT_LENGTH" environment Variable
		read(STDIN, $raw_data,$ENV{'CONTENT_LENGTH'});	
		
		#is it a multipart request?
		(my @parts) = split(/\;\ /,$ENV{'CONTENT_TYPE'});
		if($parts[0] =~ m/^multipart\/form-data$/i){
			#multipart request!!!
			(my $waste, my $boundary) = split(/\=/,$parts[1]);
			return decode_multipart_data($raw_data,$boundary);
		}else{
			%in = decode_browser_data($raw_data);
					
		}
	}
	#copy query string which has higher priority than post in this lib
	foreach my $key (keys(%qs_in)){
		$in{$key} = $qs_in{$key};		
	}
	return %in;		
}
########################################################################################################
#deoces multipart request. raw_data in $_[0], boundary string in $_[1]
#returns a data structure $rv containing from:
#$rv->{NAME}->{FIELDS}
#where name is the <input name = "NAME"> field, and FIELDS are the other fields.
#the value of the form element is in
#$rv->{NAME}->{NAME}
sub decode_multipart_data{
	my $rv = {};
	$boundary = $_[1];	
	$boundary = "--".$boundary;	
	(my @multiparts)= split(/$boundary/,$_[0]);
	for(my $cnt = 1; $cnt < @multiparts - 1; $cnt = $cnt +1){

	
		#cut off first and last newline
		chomp($multiparts[$cnt]);
		$multiparts[$cnt] = substr($multiparts[$cnt],2);
		
		(my $mpheader, my @mpdata) = split(/\n\r\n/,$multiparts[$cnt]); 	
		$mpheader =~ s/\r//g;		

		my $mpdata = donstdlib::array_to_string(\@mpdata,"\n\r\n");		
		chomp($mpdata);
		(my @mpheaderlines) = split(/\n/,$mpheader);

		#we're only interessted in the first line
		(my @pairs) = split(/\;\ /,$mpheaderlines[0]);
		my $mpart_hash = {};		
		foreach my $pair(@pairs){
			(my $key, my $value) = split(/\=/,$pair);
			$value =~ s/\"(.*)\"/$1/g;
			$mpart_hash->{$key} = $value;
		}
		if($mpart_hash->{'name'} eq ""){
			$mpart_hash->{'name'} = "undef_$cnt";
		}
		$rv->{$mpart_hash->{"name"}}=$mpart_hash;
		$/ = "\r";
				
		chomp($mpdata);
		$/ = "\n";

		$rv->{$mpart_hash->{"name"}}->{$mpart_hash->{"name"}}=$mpdata;

	}
	return $rv;
}

########################################################################################################
#creates an output compatible to decode_browser_data() (%in - hash) from a data structure created by decode_multipart_data()
#this works by ignoring additional fields, just building key value-pairs from the <input name = "NAME">-NAME field and its
#value
#needs multipart data structure in $_[0]
sub multipart_to_pseudo_browser_data{
	my %in;
	my $mpdata = $_[0];
	foreach my $key (keys(%{$mpdata})){
		$in{$key} = $mpdata->{$key}->{$key};
	}
	return %in;
}

########################################################################################################
#decodes Key-Value-keys from $_[0] in non-multipart requests
sub decode_browser_data{
	my @pairs = split(/\&/,$_[0]);
	my %browser_data;
	
	#time to decode each pair:
	foreach my $pair (@pairs){
		#split it up in key and value which are separated from each other by "="
		(my $key, my $value) = split(/\=/,$pair);
		
		#decode both key and values.
		
		$key = unescape_value($key);
		$value = unescape_value($value);

				 
		#Done - just include it to the data hash
		$browser_data{$key} = $value;

	};
	
	return %browser_data;
};
########################################################################################################
#unescapes $_[0], returns unescaped value	
sub unescape_value{		
		my $value = $_[0];
		#first, "+" must be replacted to an emty space
		
		$value =~ s/\+/\ /g;
		
		#all special characters are encoded by the browser with their values.
		#A "%" is added to the beginning
		#I could not make sure it is ASCII or UTF-8, but this does not matter
		#as UTF-8 includes ASCI - values (till #127)
		#so the "C" - Option of the pack command should do it
		
		#what we do is replacing all two-character long values that have a % in front of them
		#by looking up their UTF-8 Character according to the decimal value we get when interpeting the
		#two values as an hex...
		#whe have to use the single line option ("s")to make sure newlines aren't ignored
		#and the evaluate option ("e") - to enable the execution of hex() and pack()

		$value =~ s/\%(.{2})/pack(C,hex($1))/ges;
		return $value;
}
########################################################################################################
#unescapes data within html tags

sub unescape_tags{		
		my $document = $_[0];
		$document =~ s/\<(.*)\>/_unescape_value_for_tags($1)/ge;
		return $document;
}
########################################################################################################
#unescapes value to be used by unescape_tags()
sub _unescape_value_for_tags{
	return "<".unescape_value($_[0]).">";
}
########################################################################################################
#converts a text to html (this means it keeps the text like the orignal when interpreted as html)
#$_[0] is the plain text; returns the html source
#it is not complete yet, but should remove anything harmful.
#if $_[1] is "soft",  things like whitespaces are not html-ed
#if $_[2] is non-empty, newlines are html-ed
#rudimentary replacement of all hypertext markup tags is done by
#calling this function just by $_[1] = "soft" and $_[2] emtpy!
#if $_[3] is a non-empty string, we will not convert the chars "&
#because Xinha might not like them and does not require conversion 
sub txt_to_html{
	my $txt = $_[0];
	

	if($_[3] eq ""){
		$txt =~ s/\&/\&amp\;/gm;
		$txt =~ s/\"/\&quot\;/gm;
		$txt =~ s/\?/\&\#63\;/gm;
		$txt =~ s/\~/\&\#126\;/gm;
    	$txt =~ s/\[/\&\#91\;/gm;
		$txt =~ s/\]/\&\#93\;/gm;

   
	}

	#replace html brackets


	$txt =~ s/\</\&lt\;/gm;
	$txt =~ s/\>/\&gt\;/gm;
	
	if($_[1] ne "soft"){
		#replace special characters
   	$txt = convert_special_characters($txt);
	}
	if($_[2] ne ""){
		$txt = txt_newlines_to_html($txt);	
	};
	return $txt;
			
};
########################################################################################################
#&lt -> "<"  and so on
#does not translate quotes and special chars, just tags, if $_[1] is empty
sub html_to_txt{
	my $txt = $_[0];	
	$txt =~ s/\&lt\;/\</gmi;
	$txt =~ s/\&gt\;/\>/gmi;
	if($_[1] ne ""){
		$txt =~ s/\&amp\;/\&/gmi;
 		$txt =~ s/\#91\;/\[/gmi;
 		$txt =~ s/\#93\;/\]/gmi;
		$txt =~ s/\&quot\;/\"/gmi;
	}
	return $txt;
}

########################################################################################################
#the WYSIWYG-Editor produces output that gives just <br>s as a newline.
#this is incompatible with the rest of the system
#-> this funktion replaces <br> with <br>\n
#does not translate quotes and special chars, just tags
#in addition, it converts \n to <br>\n

sub html_br_to_br_and_newline{
	my $txt = $_[0];	
	$txt =~ s/\r//gi;
	$txt =~ s/\<br\>\n/\n/gi;
	$txt =~ s/\<br\>/\n/gi;
	$txt =~ s/\n/\<br\>\n/gi;	
	return $txt;
}

########################################################################################################
#converts <br>s to \ns - with all fancy possibilities that might happen.
#also cares about extra \ns and \rs
sub html_newlines_to_txt{
	my $txt = $_[0];
	$txt =~ s/\r//gi;
	$txt =~ s/\<br\>\n/<br>/gi;
	$txt =~ s/\<br\>/\r\n/gi;
	return $txt;
};

########################################################################################################
#converts \ns to <br>\ns; 
#if $_[1] is "hard", both \ns and <br>\ns are converted to <br>s
#removes carriage returns (\r)
sub txt_newlines_to_html{
	my $txt = $_[0];
	if($_[1] eq "hard"){
		$txt =~ s/\<br\>\n/\<br\>/gi;		
		$txt =~ s/\n/\<br\>/gi;		
	}else{
		$txt =~ s/\n/\<br\>\n/gi;			
	}	
	$txt =~ s/\r//gi;

	return $txt;
};
########################################################################################################
#converts \ns to <br>s and removes \rs - with all fancy possibilities that might happen.
#also cares about extra \ns and \rs

sub eliminate_newlines{
	my $txt = $_[0];
	$txt =~ s/\r//gi;
	$txt =~ s/\<br\>\n/\<br\>/gi;
	$txt =~ s/\n//gi;
	
	return $txt;
};

########################################################################################################
#converts quotes (") to unicode-quotes (\")
#in addition, it is necessary to encode slashes (\)
#this is used, for example, to put html-stuff in a javascript form parameter.
sub unicode_quotes{
	my $txt = $_[0];
	$txt =~ s/\\[\"\'nr]{0}/\\\\/gi;
	$txt =~ s/\"/\\\"/gi;
	$txt =~ s/\'/\\\'/gi;
	
		
	return $txt;
};

########################################################################################################
#strips all html-tags out of $_[0] and returnes the new string.
#this should work with almost all html tags except extreme things.
#please keep in mind that it just destroys TAGS, nothing between tags
#(for example Javascript) or does not care about things like &nbsp; if $_[1] is non-empty
#does not allways work -> handle with care!

sub strip_html_tags{
	if($_[1] eq ""){
		$_[0] = html_newlines_to_txt($_[0]);
	};

	while($_[0] =~ s/\<[^\>\<]*\>//g){
		;
	};
	if($_[1] eq ""){
		$_[0] = html_newlines_to_txt($_[0]);
		$_[0] = reconvert_html_chars($_[0]);
	};
	return $_[0];
};#########################################################################################################
#reconverts special html-chars like &nbsp; back to plain text. does not care about <br>s
sub reconvert_html_chars{
	my $txt = $_[0];
	$txt =~ s/\&nbsp\;/\ /gi;
	$txt =~ s/\&quot\;/\"/gi;
	$txt =~ s/\&amp\;/\&/gi;

	$txt =~ s/\&uuml\;/ü/g;
	$txt =~ s/\&auml\;/ä/g;
	$txt =~ s/\&ouml\;/Ü/g;
	
	$txt =~ s/\&Uuml\;/Ü/g;
	$txt =~ s/\&Auml\;/Ä/g;
	$txt =~ s/\&Ouml\;/Ö/g;

	$txt =~ s/\&szlig;\;/ß/g;
	return $txt;

}
#########################################################################################################
#converts special chars from txt to html like umlaute 
#(and other purposes)
sub convert_special_characters{
	my $trp = $_[0];
	$trp =~ s/\ä/sss\&auml\;/g;
	$trp =~ s/\ü/sss\&uuml\;/g;
	$trp =~ s/\ö/sss\&ouml\;/g;

	$trp =~ s/\Ä/sss\&Auml\;/g;
	$trp =~ s/\Ü/sss\&Uuml\;/g;
	$trp =~ s/\Ö/sss\&Ouml\;/g;

	$trp =~ s/\ß/\&szlig\;/g;


	return $trp;	
}
########################################################################################################
#makes text $_[0] ready for putting it into a javascript-function parameter.
#$_[1] specifies how to handle newlines. by default, they are converted.
#if $_[1] is "eliminate", they are just removed
sub html_to_js_function_param{
	my $txt = $_[0];
	if($_[1] eq "eliminate"){
		$txt = eliminate_newlines($txt);
	}else{
		$txt = txt_newlines_to_html($txt,"hard");

	}	
	$txt = unicode_quotes($txt);
			
	return $txt;

};

########################################################################################################
#creates a valid http header and returns it as a scalar string
#$_[0] can contain a content-type. default is text/html 
#$_[1] can contain a hash reference with additional header parameters (key - value => before : - after :)

sub create_http_header{
	my $ctype;	
	if($_[0] eq ""){
		$ctype = "text/html";
	}else{
		$ctype = $_[0];
	 }
	 
my $header = <<HEADERTXT
Content-Type: $ctype
HEADERTXT
;
	#additional header params
	if (ref($_[1]) ne ""){
		foreach my $key (keys(%{$_[1]})){
			$header .= $key.": ".$_[1]->{$key}."\n";		
		};	
	};
	chomp($header);
	$header .= "\n\n";
	return $header;
};






#########################################################################################################
#this function can read and return the names and values of browserparams
#having a certain name format 
#it is used to get a list of checkboxes for example, where the value can just have two possibilites
#but additional params are necessary.
#
#$_[0] must be a reference on the browserparams (%in)
#$_[1] must be the beginning of the name (identifier)
#$_[2] can be emty. if set, only params having the value $_[1] are considered.
#
#the form element name must be the identifier, separated by a dot, then additional params separated by dots 
#example of a form element name which can be handled by this function:
#my_identifier.myparam1.myparam2
#
#every name may only appear once
#
#returns a hash - see code

sub proced_multiple_elements{
	my $retvalue = {};
	foreach my $name (keys (%{$_[0]})){
		my @parts = split(/\./,$name);
		if($_[1] eq $parts[0]){										#check identifier
			if(($_[2] eq "")||($_[2] eq  $_[0]->{$name})){	#check value (if requested)
				push(@{$retvalue->{'names'}},$name);
				$retvalue->{'values'}->{$name} = $_[0]->{$name};
				for(my $cnt = 1; $cnt < @parts; $cnt++){
					push(@{$retvalue->{'params'}->{$name}}, $parts[$cnt]);
				};
			};	
		};
	};
	return $retvalue; 
};

#########################################################################################################
#this funktion uses proced_multiple_elements just with one param
#it returns an array containing only the parts[1] (part after first dot which is the param only)
#$_[0] must be a reference on %in
#$_[1] must be the identifier
#$_[2] can be emty. if set, only params having the value $_[2] are considered.
sub get_multiple_elements_one_param{
	my $elements = proced_multiple_elements($_[0], $_[1], $_[2]);
	my @retvalue;
	foreach my $name (@{$elements->{'names'}}){
		push(@retvalue, $elements->{'params'}->{$name}->[0]);	
	};
	return @retvalue;
};

#########################################################################################################
#returns an RFC-822-Conform time (built of localtime).
#$_[0] can be a time zone, GMT default
#if $_[1] is non-empty, it may contain "weekday","day","month", year","time" or "$timezone" and just the corresponding value
#will be returned (non_RFC_CONFORM in this case)
#$_[2] can be a timestamp which is used insted of the local time.
sub get_RFC_conform_time{
	
	my $now;
	if($_[2] ne ""){
		$now = $_[2];
	}else{
		$now = localtime;
	}
	
	#preparse
	$now =~ s/(\ ){2,}/\ /g;
	my $timezone;	
	if($_[0] eq ""){
		$timezone = "GMT";
	}else{
		$timezone = $_[0];
	}	
	(my $weekday, my $month, my $day, my $time, my $year) = split(/\ /,$now);

	if($month eq "Jan"){
		$month = "1";
	}elsif($month eq "Feb"){
		$month = "2";
	}elsif($month eq "Mar"){
		$month = "3";
	}elsif($month eq "Apr"){
		$month = "4";
	}elsif($month eq "May"){
		$month = "5";
	}elsif($month eq "Jun"){
		$month = "6";
	}elsif($month eq "Jul"){
		$month = "7";
	}elsif($month eq "Aug"){
		$month = "8";
	}elsif($month eq "Sep"){
		$month = "9";
	}elsif($month eq "Oct"){
		$month = "10";
	}elsif($month eq "Nov"){
		$month = "11";
	}elsif($month eq "Dec"){
		$month = "12";
	}
	
	if($_[1] eq "weekday"){
		return $weekday;
	}elsif($_[1] eq "day"){
		return $day;
	}elsif($_[1] eq "month"){
		return $month;
	}elsif($_[1] eq "year"){
		return $year;
	}elsif($_[1] eq "time"){
		return $time;
	}elsif($_[1] eq "timezone"){

		return $timezone;
	}else{
		#rfc - conform timestamp
		if(length($day) == 1){
			$day = "0".$day;	
		}	
		if(length($month) == 1){
			$month = "0".$month;	
		}	

		return "$weekday, $day $month $year $time $timezone";
	}
}

1;

