# Wrapper for the CGI module, which converts form input to UTF-8.
# Please see http://dysphoria.net/code/perl-utf8/ for documentation.
# Written Andrew Forrest, Feb 2006
# Some bugs fixed by Andrew Scheller, Jun 2006

use strict;
use utf8;

use CGI;
use Encode;
use Encode::Guess qw(cp1252); # Windows Code page 1252. Fairly common. (Default guess.)

package UTF8CGI;
use vars qw(@ISA $VERSION);
@ISA = qw(CGI);
$VERSION = 1.0;

$CGI::DefaultClass = __PACKAGE__;
our $AutoloadClass = 'CGI';

sub new {
    my ($class, @args) = @_;
	my $self = new CGI(@args);
	bless $self, $class;
    decode_CGI($self);
    return $self;
}

sub decode_CGI {
	my ($query) = @_;
	
	my $content_type = ($query->content_type() or '');
	my $decoder;
	if ($content_type =~ /charset=([a-zA-Z0-9\-]+)/) {
		my $charset = $1;
		$decoder = Encode::find_encoding($charset);
	}else{
		my $data = all_form_data($query);
		$decoder = Encode::Guess->guess($data);
		if (! ref($decoder) && defined $decoder && $decoder =~ /utf8/ ){
			$decoder = Encode::find_encoding('utf8');
		}
	}
	# If all else fails, assume latin1:
	$decoder = Encode::find_encoding('latin1')
		unless ref($decoder);
	
	my $encname = $decoder->name;
	
	# Decode all post|get parameters, suppressing errors
	# (Since this is user input, we don’t want to barf if we can help it.)
    foreach my $key ($query->param) {
		# We’ve got to do this conversion in-place, since some of the strings may also
		# be filehandles (to allow file uploads). Wacky, but true.
		foreach my $val (@{$query->param_fetch($key)}) {
			next if $query->upload($key);
			Encode::from_to($val, $encname, 'utf8');
			Encode::_utf8_on($val);
		}
    }
}

# Flatten all form data, URL-decode it, and return it as a single string.
sub all_form_data{
	my ($query) = @_;
	my $value = $query->query_string();
	$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/ego;
	return $value;
}
