problems changing character encoding of files

Max Pyziur pyz at brama.com
Thu Aug 20 19:41:08 UTC 2009


On Thu, 20 Aug 2009, Hiisi wrote:

>> > I tried
>> > $ iconv -f utf8 -t koi8 one_of_files
>> 
>> Convert from utf8 to koi8?  Isn't that the opposite of what you want?
>> The default encoding for Fedora is usually UTF-8.  Usually, you'd
>> convert foreign files to the local one, so you can edit them without
>> hassles.
>> 
>> > Is there a way to change encoding for every file in the folder?
>> 
>> Using the iconv command in a script would appear to be the way.
>> 
>> 
> You've caught me! Filling like an idiot! Am I?
> $ whoami
> IDIOT!
> Damn! It's just not my day today!
> 'iconv -f koi8 -t utf8 kant6112.out' does the task.
> Thanks, Tim!

The appended script may be helpful, a quick combo of Perl and iconv; it 
makes conversions between UTF-8 and the main Cyrillic encodings. Hack 
it to your convenience; I have no pride in authorship.

Advice from Perl gearheads accepted w/o question.

Max Pyziur
pyz at brama.com

> --
> Hiisi.
> Registered Linux User #487982. Be counted at: http://counter.li.org/
>

###### Begin script here #########################

#!//usr/bin/perl -w
# 51to8 - Convert CP1251 to UTF8

&Validate;
&Convert;

sub Validate {
         if ($#ARGV >  2 ) { print "\n\n\tToo many arguments !\n"; &Usage; exit;}
         if ($#ARGV <  1 ) { print "\n\n\tNo arguments !\n"; &Usage; exit;}

         if ($ARGV[0] ne "wk" && $ARGV[0] ne "kw" && $ARGV[0] ne "wu" && $ARGV[0] ne "uw" && $ARGV[0] ne "ku" && $ARGV[0] ne "uk")  {
                 print "\n\n\tCode can only be a choice of \"wk\", \"kw\", \"wd\", \"kd\", \n\t\"dt\", \"dw\", \"dk\", \"wt\", or \"kt\"!\n" ;
                 if (substr($ARGV[0], 0, 1) eq "t") {
                         print "\n\t\(sorry latynka to cyr conversion is not available at this time\)\n\n" ;
                 }
                 &Usage; exit;
         }

         if (!-e $ARGV[1]) { print "\n\n\tSourceFile does not exist!\n\n"; &Usage; exit;}
         if ( defined $ARGV[2] && -e $ARGV[2] ) {
                  print "\n\n\tTargetFile exists!\n\n";
                 print "\t\(O\)verwrite or \(E\)xit?  " ;
                 $_ =<STDIN> ; chomp ; tr/a-z/A-Z/ ;

                 if ($_ ne "O") {print "\n\n"; &Usage; exit;}
         }

         if (defined $ARGV[2] && $ARGV[1] eq $ARGV[2]) {print "\n\n\tTargetFile name should be different from SourceFile\n\n"; &Usage; exit;}

 	$IF = $ARGV[1];
}

sub Usage {
         $PROG = `basename $0`; chomp $PROG;

         print "\tThe program is run in the following way: \n\n";
         print "\t$PROG Code SourceFile TargetFile \n\n";
         print "\twhere \"Code\" can be a choice of any two of the following:\n";
         print "\tw for windows-1251 coding
         k for koi8 coding
         u for unicode utf8

         and where Targetfile is optional

         Example:
         $PROG wk foo bar \(will convert 1251 text to koi8 coding\)
         $PROG wt foo bar \(will transliterate 1251 text to latynka\)


" ;
}


sub Convert {

 	if ($ARGV[0] eq "kw") {
 	        if (! defined $ARGV[2]) {$OF = "$IF.cp1251";} else {$OF = $ARGV[2];}
 		print "Converting KOI8 $IF to CP1251 $OF ...\n";
 		`iconv -f KOI8 -t CP1251 -o $OF $IF`;
 		print "\n...Done!\n";
         } elsif ($ARGV[0] eq "ku") {
 	        if (! defined $ARGV[2]) {$OF = "$IF.utf8";} else {$OF = $ARGV[2];}
 		print "Converting KOI8 $IF to UTF8 $OF ...\n";
 		`iconv -f KOI8 -t UTF8 -o $OF $IF`;
 		print "\n...Done!\n";
         } elsif ($ARGV[0] eq "wu") {
 	        if (! defined $ARGV[2]) {$OF = "$IF.utf8";} else {$OF = $ARGV[2];}
 		print "Converting CP1251 $IF to UTF8 $OF ...\n";
 		`iconv -f CP1251 -t UTF8 -o $OF $IF`;
 		print "\n...Done!\n";
         } elsif ($ARGV[0] eq "wk") {
 	        if (! defined $ARGV[2]) {$OF = "$IF.koi8";} else {$OF = $ARGV[2];}
 		print "Converting CP1251 $IF to KOI8 $OF ...\n";
 		`iconv -f CP1251 -t KOI8 -o $OF $IF`;
 		print "\n...Done!\n";
         } elsif ($ARGV[0] eq "uk") {
 	        if (! defined $ARGV[2]) {$OF = "$IF.koi8";} else {$OF = $ARGV[2];}
 		print "Converting UTF8 $IF to KOI8 $OF ...\n";
 		`iconv -f UTF8 -t KOI8 -o $OF $IF`;
 		print "\n...Done!\n";
         } elsif ($ARGV[0] eq "uw") {
 	        if (! defined $ARGV[2]) {$OF = "$IF.cp1251";} else {$OF = $ARGV[2];}
 		print "Converting UTF8 $IF to CP1251 $OF ...\n";
 		`iconv -f UTF8 -t CP1251 -o $OF $IF`;
 		print "\n...Done!\n";
         }

}




More information about the fedora-list mailing list