#! /usr/bin/perl

#
# Copyright  2006 Luciano Montanaro
#

#   This file is free software; you can redistribute it and/or
#   modify it under the terms of the GNU Library General Public
#   License as published by the Free Software Foundation; either
#   version 2 of the License, or (at your option) any later version.

#   This library is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#   Library General Public License for more details.

#   You should have received a copy of the GNU Library General Public License
#   along with this library; see the file COPYING.LIB.  If not, write to
#   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
#   Boston, MA 02110-1301, USA.

use Getopt::Std;

my $verbose = 0;
my $check = 0;
my $line = 0;
my $lasterrorline = -1;
my $text;

if (!getopts('wp')) {
	print STDERR ("Sintassi sconosciuta sulla riga di comando.\n");
	printUsage();
	exit;
} 

my $source = $ARGV[0];
if (defined($source)) {
	open STDIN, "$source" or die "Cannot open input file";
}

my %camelword = (
	'ASpell' => 't',
	'CImg' => 't',
	'CMake' => 't',
	'CTags' => 't',
	'CPU' => 't',
	'GBit' => 't',
	'GByte' => 't',
	'GPhoto' => 't',
	'GStreamer' => 't',
	'ICal' => 't',
	'ICard' => 't',
	'ICCCM' => 't',
	'IFrame' => 't',
	'ISpell' => 't',
	'MBit' => 't',
	'MByte' => 't',
	'MHz' => 't',
	'MPlayer' => 't',
	'NEdit' => 't',
	'OClock' => 't',
	'QApplication' => 't',
	'QBrush' => 't',
	'QColor' => 't',
	'QMake' => 't',
	'QPainter' => 't',
	'QPicture' => 't',
	'QRegExp' => 't',
	'QString' => 't',
	'QTextEdit' => 't',
	# 'QTopia' => 't', Qtopia should be written with only a capital Q
	'QWidget' => 't',
	'TByte' => 't',
	'VCal' => 't',
	'VCard' => 't',
	'XFree' => 't',
	'ZAngband' => 't',
	'ZLib' => 't',
	'ZModem' => 't',
);

my %abbreviation = (
	'etc' => 't',
	'es' => 't',
	'Inc' => 't',
	'incr' => 't',
);

print STDERR ("Checking $source...\n") if ($verbose);

while (<STDIN>) {
	$line++;
	
	if (defined($opt_p)) {
		$check = 1;
	} else {
		if (/^#.*$/) {
			# discard comments
			next;
		} elsif (/^msgid/) {
			# discard message ids
			$check = 0;
			print STDERR ("msgid, not checking at line $line\n") if ($verbose > 1);
		} elsif (/^msgstr/) {
			# check msgstr
			$check = 1;
			print STDERR ("msgstr, checking at line $line\n") if ($verbose > 1);
		}
	}
	if ($check) {
		$text = $_;

		my $ft = filter_extra_chars($text);

		# Accumulate text in one string
		if (defined($opt_w)) {
			while (<IN>) {
				$line++;
				last if (/^$/);
				last if (/^#.*$/);
				$text .= $_;
				$ft .= filter_extra_chars($_);
			}
		}
		$ft =~ s/\n//g;
		
		check_triple($ft);
		check_commas($ft);
		check_spaces($ft);
		check_repeated($ft);
		check_double_uppercase($ft);
		check_conflict_marker($ft);
		check_its($ft);
		if (defined($opt_w)) {
			# Do checks that work only on whole messages.
			# For example matching parenthesis, matching quotes etc.
		}
	} else {
		next;
	}
}

exit 0;

sub error($)
{
	$message = shift;
	print "\n$text" if ($lasterrorline != $line);
	$lasterrorline = $line;
	print ("$source:$line - $message\n");
}

sub filter_extra_chars($)
{
	# Generic rules
	
	# Remove leding and trailing '"'
	s/^msgstr\s*"//;
	s/^\s*"//;
	s/"\s*$//;
	
	# Change '\"' to simply '"'
	s/\\"/"/g;

	# Change "\n" to a space
	s/\\n/ /g;
	
	# Rules useful to strip KDE markup.
	
	# Convert common html/xml entities
	s/&lt;/</g;
	s/&gt;/>/g;
	s/&amp;/&/g;
	s/&aacute;/á/g;
	s/&agrave;/à/g;
	s/&Auml;/Ä/g;
	s/&auml;/ä/g;
	s/&ccedil;/ç/g;
	s/&eacute;/é/g;
	s/&egrave;/è/g;
	s/&iacute;/í/g;
	s/&igrave;/ì/g;
	s/&ograve;/ò/g;
	s/&ouml;/ö/g;
	s/&Ouml;/Ö/g;
	s/&szlig;/ß/g;
	s/&uacute;/ú/g;
	s/&ugrave;/ù/g;
	s/&uuml;/ü/g;
	s/&Uuml;/Ü/g;
	s/&lowbar;/_/g;
	s/&quot;/"/g;
	s/&nbsp;/ /g;
	s/&konqueror;/Konqueror/g; # Silence lots of warnings in the doc.
	s/&amarok;/Amarok/g; # Silence lots of warnings in the doc.

	# Remove KDE accelerator indicator
	s/([a-z])&([a-z])/\1\2/ig;
	# Remove GNOME accelerator indicator
	s/([a-z])_([a-z])/\1\2/ig;

	return $_;
}

sub check_triple($)
{
	my $message = shift;
	print STDERR ("$source:$line checking triple: $message\n") if ($verbose > 1);
	# Sequenze di tre caratteri uguali. Ignora la linea se ci sono numeri esadecimali 0xffff ecc.
	if (($message =~ /([bcdfglmnprstvz])\1\1/i) and 
		!(
			($message =~ /(pppd?|kppp\w*)\b/i) or 
			($message =~ /0x/) or 
			($message =~/glossseealso/) or
			($message =~/\b([BCDFGLMNPRSTVZbcdfglmnprstvz])\1\1\1?\1?\1?\1?\1?\1?\1?\1?\b/)
		)
	) {
		error("sequence of three or more identical letters");
	}
}

sub check_commas($)
{
	my $message = shift;
	print STDERR ("$source:$line checking: $message\n") if ($verbose > 1);
	# Sequenze di tre caratteri uguali. Ignora la linea se ci sono numeri esadecimali 0xffff ecc.
	if ($message =~ /,,/) {
		error("repeated commas");
	}
}

sub check_spaces($)
{
	my $message = shift;
	print STDERR ("$source:$line checking spaces in: $message\n") if ($verbose > 1);
	if (($message =~ / ([.,;:!?])/) and
		!(
			($message =~ / !=/) or 
			($message =~ / .[a-z]{1,3}/i)
		)
	) {
		error("space before $1");
	}
	if (($message =~ /[!?,;]([a-z][^\s]*)/i) 
	) {
		if (
			!(
				($message =~ /^Keywords=/) or
				($message =~ /^Query=/)
			)
		) {
			error("missing space before $1");
		}
	}
	if (($message =~ /\(\s/i) 
	) {
		error("space after an open parenthesis");
	}
	if (($message =~ /\s\)/i) 
	) {
		error("space before a closed parenthesis");
	}
	# For KDE docbooks: extra spaces between selected tags
	if (($message =~ /<(application|email|keycap|menuitem|guibutton|guiicon|guilabel|guimenu|quote)>[^<]+\s<\/\1>/i)) {
		error("space before closing tag <$1>");
	}
	if (($message =~ /<(keycombo)\s?.*>[^<]+\s<\/\1>/i)) {
		error("space before closing tag <$1>");
	}
	if (($message =~ /<(application|email|keycap|menuitem|guibutton|guiicon|guilabel|guimenu|quote)\s?.*>\s[^<]+<\/\1>/i)) {
		error("space after opening tag <$1>");
	}
	if (($message =~ /<(keycombo)\s?.*>\s[^<]+<\/\1>/i)) {
		error("space after opening tag <$1>");
	}
}

sub check_repeated($)
{
	my $message = shift;
	print STDERR ("$source:$line checking for repeated words: $message\n") if ($verbose > 1);
	if ($message =~ /(\b([a-z]{2,}) \1\b)/i) {
		my $match = $&;
		if (!$message =~ /($match)[àèìòùáéíóú]/) {
			error("repeated words");
		}
	}
}

sub check_double_uppercase($)
{
	# DUe maiuscole all'inizio di una parola (di solito sono un
	# 	errore, ma ci sono eccezioni)
	my $message = shift;
	print STDERR ("$source:$line checking for double capitals: $message\n") if ($verbose > 1);
	# Special case for "K", since it's used in many KDE programs.
	if (($message =~ /\b([A-JL-WYZ]{2}[a-z]+)/)) {
		if (!($camelword{$1} eq 't')) {
			error("Two capital letters at the beginning of word $1");
		}
	}
}

sub check_missing_uppercase($)
{
	my $message = shift;
	print STDERR ("$source:$line checking for missing capitals: $message\n") if ($verbose > 1);
	# Special case for "K", since it's used in many KDE programs.
	if (($message =~ /([a-z]+)\.\s+[a-z]/)) {
		if (!($abbreviation{$1} eq 't')) {
			error("lowercase after a full stop");
		}
	}
}

sub check_conflict_marker($)
{
	my $message = shift;
	print STDERR ("$source:$line checking for conflict marker: $message\n") if ($verbose > 1);
	# Special case for "K", since it's used in many KDE programs.
	if (($message =~ />>>>>>/)) {
		my $matvh = $1;
		error("CVS/SVN conflict marker");
	}
}

sub check_its($)
{
	my $message = shift;
	print STDERR ("$source:$line checking for conflict marker: $message\n") if ($verbose > 1);
	# Special case for "K", since it's used in many KDE programs.
	$prepositions="aboard|about|above|across|after|against|along|amid|among|around|at|" .
		"before|behind|below|beneath|beside|between|beyond|by|despite|down|" .
		"during|except|excepting|excluding|following|for|from|in|inside|into|" .
		"like|near|of|off|on|onto|opposite|outside|over|past|per|plus|" .
		"round|save|since|than|through|to|toward|towards|under|underneath|" . 
		"unlike|until|up|upon|versus|via|with|within|without";
	if (($message =~ /\b($prepositions)\s+it's/i)) {
		error("Looks like an \"its\" is required here, not an \"it's\" after $1");
	}
}

sub check_bad_spelling($)
{
	my $message = shift;
	print STDERR ("$source:$line checking for fancy capitalization: $message\n") if ($verbose > 1);
	if ($message =~ /\bpostscript\b/i) {
		if (!($message =~ /\bPostScript\b/)) {
			error("Usually, PostScript should be written with uppercase 'P' and 'S'");
		}
	}
	if ($message =~ /\bamarok\b/i) {
		if (!($message =~ /\bAmarok\b/)) {
			error("Amarok should have the first letter (only) in upper case");
		}
	}
	if ($message =~ /\bkonqueror\b/i) {
		if (!($message =~ /\bKonqueror\b/)) {
			error("Konqueror should be written with an uppercase K");
		}
	}
	if ($message =~ /\bdeprecia/i) {
		error("deprecation, not depreciation");
	}
}

sub printUsage()
{
	print STDERR ("Usage: $0 {-wt} file.po\n");
	print STDERR ("\n");
	print STDERR ("-w esegui i controlli sui messaggi completi invece che riga per riga\n");
	print STDERR ("-p tratta il file come un file di puro testo (non un file .po)\n");
	
}