#! /usr/bin/perl # # Copyright 2006 Luciano Montanaro # # This file is free software; you can redistribute it and/or # modify it under the terms of the GNU Library General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Library General Public License for more details. # You should have received a copy of the GNU Library General Public License # along with this library; see the file COPYING.LIB. If not, write to # the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301, USA. use Getopt::Std; my $verbose = 0; my $check = 0; my $line = 0; my $lasterrorline = -1; my $text; if (!getopts('wp')) { print STDERR ("Sintassi sconosciuta sulla riga di comando.\n"); printUsage(); exit; } my $source = $ARGV[0]; if (defined($source)) { open STDIN, "$source" or die "Cannot open input file"; } my %camelword = ( 'ASpell' => 't', 'CImg' => 't', 'CMake' => 't', 'CTags' => 't', 'CPU' => 't', 'GBit' => 't', 'GByte' => 't', 'GPhoto' => 't', 'GStreamer' => 't', 'ICal' => 't', 'ICard' => 't', 'ICCCM' => 't', 'IFrame' => 't', 'ISpell' => 't', 'MBit' => 't', 'MByte' => 't', 'MHz' => 't', 'MPlayer' => 't', 'NEdit' => 't', 'OClock' => 't', 'QApplication' => 't', 'QBrush' => 't', 'QColor' => 't', 'QMake' => 't', 'QPainter' => 't', 'QPicture' => 't', 'QRegExp' => 't', 'QString' => 't', 'QTextEdit' => 't', # 'QTopia' => 't', Qtopia should be written with only a capital Q 'QWidget' => 't', 'TByte' => 't', 'VCal' => 't', 'VCard' => 't', 'XFree' => 't', 'ZAngband' => 't', 'ZLib' => 't', 'ZModem' => 't', ); my %abbreviation = ( 'etc' => 't', 'es' => 't', 'Inc' => 't', 'incr' => 't', ); print STDERR ("Checking $source...\n") if ($verbose); while () { $line++; if (defined($opt_p)) { $check = 1; } else { if (/^#.*$/) { # discard comments next; } elsif (/^msgid/) { # discard message ids $check = 0; print STDERR ("msgid, not checking at line $line\n") if ($verbose > 1); } elsif (/^msgstr/) { # check msgstr $check = 1; print STDERR ("msgstr, checking at line $line\n") if ($verbose > 1); } } if ($check) { $text = $_; my $ft = filter_extra_chars($text); # Accumulate text in one string if (defined($opt_w)) { while () { $line++; last if (/^$/); last if (/^#.*$/); $text .= $_; $ft .= filter_extra_chars($_); } } $ft =~ s/\n//g; check_triple($ft); check_commas($ft); check_spaces($ft); check_repeated($ft); check_double_uppercase($ft); check_conflict_marker($ft); check_its($ft); if (defined($opt_w)) { # Do checks that work only on whole messages. # For example matching parenthesis, matching quotes etc. } } else { next; } } exit 0; sub error($) { $message = shift; print "\n$text" if ($lasterrorline != $line); $lasterrorline = $line; print ("$source:$line - $message\n"); } sub filter_extra_chars($) { # Generic rules # Remove leding and trailing '"' s/^msgstr\s*"//; s/^\s*"//; s/"\s*$//; # Change '\"' to simply '"' s/\\"/"/g; # Change "\n" to a space s/\\n/ /g; # Rules useful to strip KDE markup. # Convert common html/xml entities s/<//g; s/&/&/g; s/á/á/g; s/à/à/g; s/Ä/Ä/g; s/ä/ä/g; s/ç/ç/g; s/é/é/g; s/è/è/g; s/í/í/g; s/ì/ì/g; s/ò/ò/g; s/ö/ö/g; s/Ö/Ö/g; s/ß/ß/g; s/ú/ú/g; s/ù/ù/g; s/ü/ü/g; s/Ü/Ü/g; s/_/_/g; s/"/"/g; s/ / /g; s/&konqueror;/Konqueror/g; # Silence lots of warnings in the doc. s/&amarok;/Amarok/g; # Silence lots of warnings in the doc. # Remove KDE accelerator indicator s/([a-z])&([a-z])/\1\2/ig; # Remove GNOME accelerator indicator s/([a-z])_([a-z])/\1\2/ig; return $_; } sub check_triple($) { my $message = shift; print STDERR ("$source:$line checking triple: $message\n") if ($verbose > 1); # Sequenze di tre caratteri uguali. Ignora la linea se ci sono numeri esadecimali 0xffff ecc. if (($message =~ /([bcdfglmnprstvz])\1\1/i) and !( ($message =~ /(pppd?|kppp\w*)\b/i) or ($message =~ /0x/) or ($message =~/glossseealso/) or ($message =~/\b([BCDFGLMNPRSTVZbcdfglmnprstvz])\1\1\1?\1?\1?\1?\1?\1?\1?\1?\b/) ) ) { error("sequence of three or more identical letters"); } } sub check_commas($) { my $message = shift; print STDERR ("$source:$line checking: $message\n") if ($verbose > 1); # Sequenze di tre caratteri uguali. Ignora la linea se ci sono numeri esadecimali 0xffff ecc. if ($message =~ /,,/) { error("repeated commas"); } } sub check_spaces($) { my $message = shift; print STDERR ("$source:$line checking spaces in: $message\n") if ($verbose > 1); if (($message =~ / ([.,;:!?])/) and !( ($message =~ / !=/) or ($message =~ / .[a-z]{1,3}/i) ) ) { error("space before $1"); } if (($message =~ /[!?,;]([a-z][^\s]*)/i) ) { if ( !( ($message =~ /^Keywords=/) or ($message =~ /^Query=/) ) ) { error("missing space before $1"); } } if (($message =~ /\(\s/i) ) { error("space after an open parenthesis"); } if (($message =~ /\s\)/i) ) { error("space before a closed parenthesis"); } # For KDE docbooks: extra spaces between selected tags if (($message =~ /<(application|email|keycap|menuitem|guibutton|guiicon|guilabel|guimenu|quote)>[^<]+\s<\/\1>/i)) { error("space before closing tag <$1>"); } if (($message =~ /<(keycombo)\s?.*>[^<]+\s<\/\1>/i)) { error("space before closing tag <$1>"); } if (($message =~ /<(application|email|keycap|menuitem|guibutton|guiicon|guilabel|guimenu|quote)\s?.*>\s[^<]+<\/\1>/i)) { error("space after opening tag <$1>"); } if (($message =~ /<(keycombo)\s?.*>\s[^<]+<\/\1>/i)) { error("space after opening tag <$1>"); } } sub check_repeated($) { my $message = shift; print STDERR ("$source:$line checking for repeated words: $message\n") if ($verbose > 1); if ($message =~ /(\b([a-z]{2,}) \1\b)/i) { my $match = $&; if (!$message =~ /($match)[àèìòùáéíóú]/) { error("repeated words"); } } } sub check_double_uppercase($) { # DUe maiuscole all'inizio di una parola (di solito sono un # errore, ma ci sono eccezioni) my $message = shift; print STDERR ("$source:$line checking for double capitals: $message\n") if ($verbose > 1); # Special case for "K", since it's used in many KDE programs. if (($message =~ /\b([A-JL-WYZ]{2}[a-z]+)/)) { if (!($camelword{$1} eq 't')) { error("Two capital letters at the beginning of word $1"); } } } sub check_missing_uppercase($) { my $message = shift; print STDERR ("$source:$line checking for missing capitals: $message\n") if ($verbose > 1); # Special case for "K", since it's used in many KDE programs. if (($message =~ /([a-z]+)\.\s+[a-z]/)) { if (!($abbreviation{$1} eq 't')) { error("lowercase after a full stop"); } } } sub check_conflict_marker($) { my $message = shift; print STDERR ("$source:$line checking for conflict marker: $message\n") if ($verbose > 1); # Special case for "K", since it's used in many KDE programs. if (($message =~ />>>>>>/)) { my $matvh = $1; error("CVS/SVN conflict marker"); } } sub check_its($) { my $message = shift; print STDERR ("$source:$line checking for conflict marker: $message\n") if ($verbose > 1); # Special case for "K", since it's used in many KDE programs. $prepositions="aboard|about|above|across|after|against|along|amid|among|around|at|" . "before|behind|below|beneath|beside|between|beyond|by|despite|down|" . "during|except|excepting|excluding|following|for|from|in|inside|into|" . "like|near|of|off|on|onto|opposite|outside|over|past|per|plus|" . "round|save|since|than|through|to|toward|towards|under|underneath|" . "unlike|until|up|upon|versus|via|with|within|without"; if (($message =~ /\b($prepositions)\s+it's/i)) { error("Looks like an \"its\" is required here, not an \"it's\" after $1"); } } sub check_bad_spelling($) { my $message = shift; print STDERR ("$source:$line checking for fancy capitalization: $message\n") if ($verbose > 1); if ($message =~ /\bpostscript\b/i) { if (!($message =~ /\bPostScript\b/)) { error("Usually, PostScript should be written with uppercase 'P' and 'S'"); } } if ($message =~ /\bamarok\b/i) { if (!($message =~ /\bAmarok\b/)) { error("Amarok should have the first letter (only) in upper case"); } } if ($message =~ /\bkonqueror\b/i) { if (!($message =~ /\bKonqueror\b/)) { error("Konqueror should be written with an uppercase K"); } } if ($message =~ /\bdeprecia/i) { error("deprecation, not depreciation"); } } sub printUsage() { print STDERR ("Usage: $0 {-wt} file.po\n"); print STDERR ("\n"); print STDERR ("-w esegui i controlli sui messaggi completi invece che riga per riga\n"); print STDERR ("-p tratta il file come un file di puro testo (non un file .po)\n"); }