Change 18252 by jhi@lyta on 2002/12/06 22:19:15 Subject: Re: [perl #18107] lc(), uc() and ucfirst() broken inside utf8 regex From: Abhijit Menon-Sen <ams@wiw.org> To: perl5-porters@perl.org Date: Wed, 6 Nov 2002 19:38:11 +0530 Message-ID: <20021106193811.E20858@lustre.dyn.wiw.org> Affected files ... .... //depot/maint-5.8/perl/regcomp.c#5 edit .... //depot/maint-5.8/perl/t/op/lc.t#2 edit Differences ... ==== //depot/maint-5.8/perl/regcomp.c#5 (text) ==== Index: perl/regcomp.c --- perl/regcomp.c#4~18197~ Wed Nov 27 20:14:27 2002 +++ perl/regcomp.c Fri Dec 6 14:19:15 2002 @@ -5072,6 +5072,23 @@ SAVEVPTR(PL_reg_curpm); /* from regexec.c */ SAVEI32(PL_regnpar); /* () count. */ SAVEI32(PL_regsize); /* from regexec.c */ + + { + /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */ + int i; + GV *mgv; + REGEXP *rx; + char digits[16]; + + if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { + for (i = 1; i <= rx->nparens; i++) { + sprintf(digits, "%lu", i); + if ((mgv = gv_fetchpv(digits, FALSE, SVt_PV))) + save_scalar(mgv); + } + } + } + #ifdef DEBUGGING SAVEPPTR(PL_reg_starttry); /* from regexec.c */ #endif ==== //depot/maint-5.8/perl/t/op/lc.t#2 (text) ==== Index: perl/t/op/lc.t --- perl/t/op/lc.t#1~17645~ Fri Jul 19 12:29:57 2002 +++ perl/t/op/lc.t Fri Dec 6 14:19:15 2002 @@ -1,6 +1,6 @@ #!./perl -print "1..51\n"; +print "1..55\n"; my $test = 1; @@ -136,3 +136,18 @@ ok(uc("\x{1C5}") eq "\x{1C4}", "U+01C5 uc is U+01C4"); ok(uc("\x{1C6}") eq "\x{1C4}", "U+01C6 uc is U+01C4, too"); +# #18107: A host of bugs involving [ul]c{,first}. AMS 20021106 +$a = "\x{3c3}foo.bar"; # \x{3c3} == GREEK SMALL LETTER SIGMA. +$b = "\x{3a3}FOO.BAR"; # \x{3a3} == GREEK CAPITAL LETTER SIGMA. + +($c = $b) =~ s/(\w+)/lc($1)/ge; +ok($c eq $a, "Using s///e to change case."); + +($c = $a) =~ s/(\w+)/uc($1)/ge; +ok($c eq $b, "Using s///e to change case."); + +($c = $b) =~ s/(\w+)/lcfirst($1)/ge; +ok($c eq "\x{3c3}FOO.bAR", "Using s///e to change case."); + +($c = $a) =~ s/(\w+)/ucfirst($1)/ge; +ok($c eq "\x{3a3}foo.Bar", "Using s///e to change case."); End of Patch.