$d = "";
if ($uv < 0x800) {
    $d .= chr(( $uv >> 6)   | 0xc0);
    $d .= chr(( $uv & 0x3f) | 0x80);
    return $d;
}
if ($uv < 0x10000) {
    $d .= chr(( $uv >> 12)         | 0xe0);
    $d .= chr((($uv >>  6) & 0x3f) | 0x80);
    $d .= chr(( $uv        & 0x3f) | 0x80);
    return $d;
}
if ($uv < 0x200000) {
    $d .= chr(( $uv >> 18)         | 0xf0);
    $d .= chr((($uv >> 12) & 0x3f) | 0x80);
    $d .= chr((($uv >>  6) & 0x3f) | 0x80);
    $d .= chr(( $uv        & 0x3f) | 0x80);
    return $d;
}
---------------------------------
C3 9C C3 B1 C3 AE C3 A7 C3 B6 C3 B0 C3 A8 0A
---------------------------------
% perl -e 'open IN, "foo.utf8"; $a = <IN>; print length ($a)'
15
---------------------------------
% perl -e 'open IN, "<:utf8", "foo.utf8"; $a = <IN>; print length ($a)'
8
---------------------------------
binmode(STDOUT, ":utf8");
print chr $_ for 
(0x30b8, 0x30a7, 0x30c3, 0x30cb, 0x306f, 0x5927, 0x597d, 0x304d, 0xff01);
---------------------------------
binmode(STDOUT, ":utf8");
print "\x{30B8}\x{30A7}...";
---------------------------------
use charnames ":full";
binmode(STDOUT, ":utf8");

print "I \N{HEAVY BLACK HEART} Unicode\n";
---------------------------------
use charnames ":short";
binmode(STDOUT, ":utf8");

print "\N{hebrew:alef} \N{greek:omega}\n";
---------------------------------
use charnames ":short";
binmode(STDOUT, ":utf8");

print "\N{greek:Sigma}\N{greek:iota}\N{greek:mu}\N{greek:omicron}\N{greek:nu}";
---------------------------------
use charnames qw(greek hebrew);
binmode(STDOUT, ":utf8");

print "\N{Sigma}\N{iota}\N{mu}\N{omicron}\N{nu}\n";
print "\N{alef}\N{bet}\N{gimel}\n";
---------------------------------
use charnames qw(katakana);
binmode(STDOUT, ":utf8");
 
$x = "\N{sa}\N{i}";

$x =~ /(.)$/;
print $1;
---------------------------------
use charnames qw(:full);
binmode(STDOUT, ":utf8");

$x = "Some numbers: \N{DEVANAGARI DIGIT TWO}\N{DEVANAGARI DIGIT SIX}";

print "Found a number: $1" if $x =~ /(\d+)/;
---------------------------------
% perl -le 'my $x = chr(0x03c5).chr(0x0308).chr (0x0301); $x=~/(\X)/ and print length $1' 

3
---------------------------------
L
o
i

s
---------------------------------
L
o
?
s
---------------------------------
use Encode;
my $intern = decode("shiftjis", $text);
---------------------------------
binmode(STDOUT, ":utf8");
print $intern;
---------------------------------
% perl -C2 -MEncode -MFile::Slurp\
	-e 'print decode("shiftjis", read_file("japanese.sjis"));'
---------------------------------
#!/usr/bin/perl -n0 -MEncode
BEGIN{($from, $to) = splice @ARGV,0,2}; 

print encode($to, decode($from, $_));
---------------------------------
% transcode shiftjis euc-jp < japanese.sjis > japanese.euc
---------------------------------
% perl -MEncode -le 'print for Encode->encodings(":all")'

7bit-jis
AdobeStandardEncoding
AdobeSymbol
AdobeZdingbat
ascii
...
---------------------------------
use Encode;
open IN,  "<:encoding(shiftjis)", "data.jis" or die $!;
open OUT, ">:encoding(euc-jp)",   "data.euc" or die $!;
print OUT <IN>;
---------------------------------
my $acute = chr(193); 
print $acute;

$identity = $acute . chr(194); chop $identity;
print $identity;

$itentity = $acute . chr(257); chop $identity;
print $itentity;
---------------------------------
open IN, "<:utf8", "foo.utf8" or die $!;
$a = <IN>;
print $a;
---------------------------------
use Encode;

open IN, "<:utf8", "foo.utf8" or die $!;
$a = <IN>;
$b = encode("utf8", $a);
print $b;
---------------------------------
use Encode qw(_utf8_on);
my ($length, $data);
read(SOCKET, $length, 2);
read(SOCKET, $data, $length);
_utf8_on($data);
---------------------------------
use Encode qw(is_utf8);

$s1 = chr(70);
print "String 1 is ", (is_utf8($s1) ? "" : "not "), "UTF-8 encoded\n";

$s2 = pack("C", 70);
print "String 2 is ", (is_utf8($s2) ? "" : "not "), "UTF-8 encoded\n";

$s3 = pack("U", 70);
print "String 3 is ", (is_utf8($s3) ? "" : "not "), "UTF-8 encoded\n";
---------------------------------
String 1 is not UTF-8 encoded
String 2 is not UTF-8 encoded
String 3 is UTF-8 encoded
---------------------------------
$string = pack("U0C*", unpack("C*", $string));
---------------------------------
open IN, "<:utf8", "foo.utf8" or die $!;
$a = <IN>;
chomp $a;

print length $a; # 8

{ 
  use bytes;
  print length $a; # 15
}
---------------------------------
while (*s++) {
   /* Wykonaj odpowiedni operacj */
}
---------------------------------
STRLEN len;
while (*s) {
     UV c = utf8_to_uvchr(s, &len);
     printf("Znaleziono znak o punkcie kodowym %d, dugoci %d\n", c, len);
     s += len;
}
---------------------------------
while (*s) {
     if (UTF8_IS_INVARIANT(*s)) {
        /* Uywamy *s tak jak w starych dobrych czasachk kodu ASCII */
        s++;
     } else {
        STRLEN len;
        UV c = utf8_to_uvchr(s, &len);
        /* A tutaj rozpatrujemy kod Unicode. */
        s += len;
     }
}
---------------------------------
while (*s) {
     if (UTF8_IS_INVARIANT(*s)) {
        /* Uywamy *s tak jak w starych dobrych czasachk kodu ASCII */
        s++;
     } else {
        /* Ignorujemy te dziwaczne, dugie znaki */
        s += UTF8SKIP(*s);
     }
}
---------------------------------
 /* Konwertujemy tablic liczb na acuch Unicode */
I32 len, i;
STRLEN strlen = 0;
SV* sv;
char* s;

len = av_len(av) + 1;

for (i = 0; i < len; i++) {
    SV** sav = av_fetch(av, i, 0);
    if (! sav) continue;
    strlen += UNISKIP(SvUV(*sav));
}

/* Alokujemy pami dla potrzeb acucha */
sv = newSV(strlen);
s = SvPVX(sv);

for (i = 0; i < len; i++) {
    SV** sav = av_fetch(av, i, 0);
    if (! sav) continue;
    s = uvchr_to_utf8(s, SvUV(*sav));
}

/* Perl wewntrznie oczekuje bajtu NUL po kadym buforze, wic go wpisujemy */

s = '\0';

/* Informujemy Perl, jak dugi jest skalar oraz e jest buforem acuchowym, 
a bufor ten zawiera znaki zakodowane w formacie UTF-8 */

SvCUR_set(sv, strlen);
SvPOK_on(sv);
SvUTF8_on(sv);
---------------------------------
sv_gets(sv, fp, 0);
/* Chcemy oznaczy ten acuch jako Unicode */
SvUTF8_on(sv);
---------------------------------
STRLEN len;
char *s;

sv_gets(sv, fp, 0);
s = SvPV(sv, len);
if (is_utf8_string(s, len)) {
   SvUTF8_on(sv);
} else {
   /* Yo nie format UTF-8, o co chodzi? */
}
---------------------------------
ENTER;
SAVETMPS;

PUSHMARK(sp);
XPUSHp("euc-jp", 6);
PUTBACK;
call_pv("Encode::find_encoding", G_SCALAR);
SPAGAIN;
encoding_obj = POPs;
PUTBACK;
---------------------------------
PUSHMARK(sp);
XPUSHs(encoding_obj);
XPUSHs(euc_data);
XPUSHi(0);
PUTBACK;
if (call_method("decode", G_SCALAR) != 1) {
    Perl_die(aTHX_ "panic: dekodowanie nie zwrcio wartoci");
}
SPAGAIN;
uni = POPs;
PUTBACK;
---------------------------------

