New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PATCH] Data::Dumper: useqq implementation for xs #10357
Comments
From @eserteThe patch basically misuses the existing esc_q_utf8 function which Regards, |
From @eserte0001-Data-Dumper-useqq-implementation-for-xs.patchFrom b642e10dcde67659983ca267108434b0a4b9cf07 Mon Sep 17 00:00:00 2001
From: Slaven Rezic <srezic@iconmobile.com>
Date: Thu, 29 Apr 2010 18:02:20 +0200
Subject: [PATCH] Data::Dumper: useqq implementation for xs
---
dist/Data-Dumper/Dumper.pm | 1 -
dist/Data-Dumper/Dumper.xs | 67 ++++++++++++++++++++++++++++++-------------
dist/Data-Dumper/t/dumper.t | 31 ++++++++++---------
3 files changed, 63 insertions(+), 36 deletions(-)
mode change 100644 => 100755 dist/Data-Dumper/t/dumper.t
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index 5967642..1947b3f 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -189,7 +189,6 @@ sub DESTROY {}
sub Dump {
return &Dumpxs
unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
- $Data::Dumper::Useqq || (ref($_[0]) && $_[0]->{useqq}) ||
$Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
return &Dumpperl;
}
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index f2c1821..7f0235a 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -14,7 +14,7 @@
static I32 num_q (const char *s, STRLEN slen);
static I32 esc_q (char *dest, const char *src, STRLEN slen);
-static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
+static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 useqq);
static I32 needs_quote(register const char *s);
static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
@@ -22,7 +22,7 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
SV *freezer, SV *toaster,
I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
- I32 maxdepth, SV *sortkeys);
+ I32 maxdepth, SV *sortkeys, I32 useqq);
#ifndef HvNAME_get
#define HvNAME_get HvNAME
@@ -127,8 +127,10 @@ esc_q(register char *d, register const char *s, register STRLEN slen)
return ret;
}
+/* this function is also misused for implementing $Useqq */
static I32
-esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
+esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen,
+ register I32 useqq)
{
char *r, *rstart;
const char *s = src;
@@ -160,6 +162,12 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
#endif
);
+#ifndef EBCDIC
+ } else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) {
+ grow += 2;
+ } else if (useqq && k <= 31) {
+ grow += 3;
+#endif
} else if (k == '\\') {
backslashes++;
} else if (k == '\'') {
@@ -170,7 +178,7 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
normal++;
}
}
- if (grow) {
+ if (grow || useqq) {
/* We have something needing hex. 3 is ""\0 */
sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
+ 2*qq_escapables + normal);
@@ -189,6 +197,26 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
#ifdef EBCDIC
if (isprint(k) && k < 256)
#else
+ if (useqq && k <= 31) {
+ *r++ = '\\';
+ switch (k) {
+ case 7: *r++ = 'a'; break;
+ case 8: *r++ = 'b'; break;
+ case 9: *r++ = 't'; break;
+ case 10: *r++ = 'n'; break;
+ case 12: *r++ = 'f'; break;
+ case 13: *r++ = 'r'; break;
+ case 27: *r++ = 'e'; break;
+ default:
+ if (k <= 7) {
+ *r++ = (char)k + '0';
+ } else {
+ *r++ = (char)(k/8) + '0';
+ *r++ = (char)(k%8) + '0';
+ }
+ }
+ }
+ else
if (k < 0x80)
#endif
*r++ = (char)k;
@@ -262,7 +290,8 @@ static I32
DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
- I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
+ I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys,
+ I32 useqq)
{
char tmpbuf[128];
U32 i;
@@ -485,7 +514,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys);
+ maxdepth, sortkeys, useqq);
sv_catpvn(retval, ")}", 2);
} /* plain */
else {
@@ -493,7 +522,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys);
+ maxdepth, sortkeys, useqq);
}
SvREFCNT_dec(namesv);
}
@@ -505,7 +534,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys);
+ maxdepth, sortkeys, useqq);
SvREFCNT_dec(namesv);
}
else if (realtype == SVt_PVAV) {
@@ -578,7 +607,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys);
+ maxdepth, sortkeys, useqq);
if (ix < ixmax)
sv_catpvn(retval, ",", 1);
}
@@ -738,9 +767,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
The code is also smaller (22044 vs 22260) because I've been
able to pull the common logic out to both sides. */
if (quotekeys || needs_quote(key)) {
- if (do_utf8) {
+ if (do_utf8 || useqq) {
STRLEN ocur = SvCUR(retval);
- nlen = esc_q_utf8(aTHX_ retval, key, klen);
+ nlen = esc_q_utf8(aTHX_ retval, key, klen, useqq);
nkey = SvPVX(retval) + ocur;
}
else {
@@ -785,7 +814,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
postav, levelp, indent, pad, xpad, newapad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys);
+ maxdepth, sortkeys, useqq);
SvREFCNT_dec(sname);
Safefree(nkey_buffer);
if (indent >= 2)
@@ -965,7 +994,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
seenhv, postav, &nlevel, indent, pad, xpad,
newapad, sep, pair, freezer, toaster, purity,
deepcopy, quotekeys, bless, maxdepth,
- sortkeys);
+ sortkeys, useqq);
SvREFCNT_dec(e);
}
}
@@ -980,8 +1009,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
else {
integer_came_from_string:
c = SvPV(val, i);
- if (DO_UTF8(val))
- i += esc_q_utf8(aTHX_ retval, c, i);
+ if (DO_UTF8(val) || useqq)
+ i += esc_q_utf8(aTHX_ retval, c, i, useqq);
else {
sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
r = SvPVX(retval) + SvCUR(retval);
@@ -1026,7 +1055,7 @@ Data_Dumper_Dumpxs(href, ...)
HV *seenhv = NULL;
AV *postav, *todumpav, *namesav;
I32 level = 0;
- I32 indent, terse, i, imax, postlen;
+ I32 indent, terse, useqq, i, imax, postlen;
SV **svp;
SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
SV *freezer, *toaster, *bless, *sortkeys;
@@ -1065,7 +1094,7 @@ Data_Dumper_Dumpxs(href, ...)
= freezer = toaster = bless = sortkeys = &PL_sv_undef;
name = sv_newmortal();
indent = 2;
- terse = purity = deepcopy = 0;
+ terse = purity = deepcopy = useqq = 0;
quotekeys = 1;
retval = newSVpvn("", 0);
@@ -1085,10 +1114,8 @@ Data_Dumper_Dumpxs(href, ...)
purity = SvIV(*svp);
if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
terse = SvTRUE(*svp);
-#if 0 /* useqq currently unused */
if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
useqq = SvTRUE(*svp);
-#endif
if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
pad = *svp;
if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
@@ -1191,7 +1218,7 @@ Data_Dumper_Dumpxs(href, ...)
DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
postav, &level, indent, pad, xpad, newapad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys,
- bless, maxdepth, sortkeys);
+ bless, maxdepth, sortkeys, useqq);
if (indent >= 2 && !terse)
SvREFCNT_dec(newapad);
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
old mode 100644
new mode 100755
index de5e87c..0b4c6f9
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -83,11 +83,11 @@ sub SKIP_TEST {
$Data::Dumper::Useperl = 1;
if (defined &Data::Dumper::Dumpxs) {
print "### XS extension loaded, will run XS tests\n";
- $TMAX = 363; $XS = 1;
+ $TMAX = 369; $XS = 1;
}
else {
print "### XS extensions not loaded, will NOT run XS tests\n";
- $TMAX = 183; $XS = 0;
+ $TMAX = 186; $XS = 0;
}
print "1..$TMAX\n";
@@ -296,21 +296,9 @@ $foo = { "abc\000\'\efg" => "mno\000",
{
local $Data::Dumper::Useqq = 1;
TEST q(Dumper($foo));
+ TEST q(Data::Dumper::DumperX($foo)) if $XS;
}
- $WANT = <<"EOT";
-#\$VAR1 = {
-# 'abc\0\\'\efg' => 'mno\0',
-# 'reftest' => \\\\1
-#};
-EOT
-
- {
- local $Data::Dumper::Useqq = 1;
- TEST q(Data::Dumper::DumperX($foo)) if $XS; # cheat
- }
-
-
#############
#############
@@ -1430,3 +1418,16 @@ EOT
}
+############# 363
+{
+ $WANT = <<'EOT';
+#$VAR1 = [
+# "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37"
+#];
+EOT
+
+ $foo = [ "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37" ];
+ local $Data::Dumper::Useqq = 1;
+ TEST q(Dumper($foo));
+ TEST q(Data::Dumper::DumperX($foo)) if $XS;
+}
--
1.7.0.3
|
From @eserteThe attachment named "work" went into the bug report by accident and may Regards, |
From @cpansproutOn Fri Apr 30 01:52:00 2010, srezic@iconmobile.com wrote:
Thank you for the patch. It looks nice, but there’s one problem: $ ./perl -Ilib -MData::Dumper -le '++$Data::Dumper::Useqq; print Dumper This is the output I get with the pure-Perl version: $ perl -MData::Dumper -le '++$Data::Dumper::Useqq; print Dumper join "", |
The RT System itself - Status changed from 'new' to 'open' |
From @eserteFather Chrysostomos via RT wrote:
This suggests that there are some tests missing. So please accept first Regards, |
From @eserte0001-test-Data-Dumper-with-all-latin1-characters.patchFrom 295088638affff387e10d4fcc186be6b65b07d14 Mon Sep 17 00:00:00 2001
From: Slaven Rezic <srezic@iconmobile.com>
Date: Mon, 27 Sep 2010 12:36:58 +0200
Subject: [PATCH] test Data::Dumper with all latin1 characters
---
dist/Data-Dumper/t/dumper.t | 31 +++++++++++++++++++++++++++++--
1 files changed, 29 insertions(+), 2 deletions(-)
mode change 100644 => 100755 dist/Data-Dumper/t/dumper.t
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
old mode 100644
new mode 100755
index 17a7466..915d46d
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -83,11 +83,11 @@ sub SKIP_TEST {
$Data::Dumper::Useperl = 1;
if (defined &Data::Dumper::Dumpxs) {
print "### XS extension loaded, will run XS tests\n";
- $TMAX = 366; $XS = 1;
+ $TMAX = 378; $XS = 1;
}
else {
print "### XS extensions not loaded, will NOT run XS tests\n";
- $TMAX = 183; $XS = 0;
+ $TMAX = 189; $XS = 0;
}
print "1..$TMAX\n";
@@ -1439,3 +1439,30 @@ TEST q(join " ", new Data::Dumper [[]],[] =>->Dumpxs),
'$obj->Dumpxs in list context'
if $XS;
+############# 366
+{
+ $WANT = <<'EOT';
+#$VAR1 = [
+# "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
+#];
+EOT
+
+ $foo = [ join "", map chr, 0..255 ];
+ local $Data::Dumper::Useqq = 1;
+ TEST q(Dumper($foo)), 'All latin1 characters';
+ for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+}
+
+############# 372
+{
+ $WANT = <<'EOT';
+#$VAR1 = [
+# "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\x{80}\x{81}\x{82}\x{83}\x{84}\x{85}\x{86}\x{87}\x{88}\x{89}\x{8a}\x{8b}\x{8c}\x{8d}\x{8e}\x{8f}\x{90}\x{91}\x{92}\x{93}\x{94}\x{95}\x{96}\x{97}\x{98}\x{99}\x{9a}\x{9b}\x{9c}\x{9d}\x{9e}\x{9f}\x{a0}\x{a1}\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}\x{a7}\x{a8}\x{a9}\x{aa}\x{ab}\x{ac}\x{ad}\x{ae}\x{af}\x{b0}\x{b1}\x{b2}\x{b3}\x{b4}\x{b5}\x{b6}\x{b7}\x{b8}\x{b9}\x{ba}\x{bb}\x{bc}\x{bd}\x{be}\x{bf}\x{c0}\x{c1}\x{c2}\x{c3}\x{c4}\x{c5}\x{c6}\x{c7}\x{c8}\x{c9}\x{ca}\x{cb}\x{cc}\x{cd}\x{ce}\x{cf}\x{d0}\x{d1}\x{d2}\x{d3}\x{d4}\x{d5}\x{d6}\x{d7}\x{d8}\x{d9}\x{da}\x{db}\x{dc}\x{dd}\x{de}\x{df}\x{e0}\x{e1}\x{e2}\x{e3}\x{e4}\x{e5}\x{e6}\x{e7}\x{e8}\x{e9}\x{ea}\x{eb}\x{ec}\x{ed}\x{ee}\x{ef}\x{f0}\x{f1}\x{f2}\x{f3}\x{f4}\x{f5}\x{f6}\x{f7}\x{f8}\x{f9}\x{fa}\x{fb}\x{fc}\x{fd}\x{fe}\x{ff}\x{20ac}"
+#];
+EOT
+
+ $foo = [ join "", map chr, 0..255, 0x20ac ];
+ local $Data::Dumper::Useqq = 1;
+ TEST q(Dumper($foo)), 'All latin1 characters with utf8 flag including a wide character';
+ for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+}
--
1.7.0.3
|
From @cpansproutOn Mon Sep 27 04:53:05 2010, srezic@iconmobile.com wrote:
Thank you. I’ve applied it as 45e462c. |
From @tonycozOn Mon Sep 27 04:53:05 2010, srezic@iconmobile.com wrote:
Hi Slaven, Did you ever have a chance to look further into this? Tony |
From @demerphqits obvious to me his patch treated raw binary as utf8. dd only uses octal On Tuesday, 25 June 2013, Tony Cook via RT wrote:
-- |
From @tonycozI may look at it once I run out of patch tickets to look at[1], but if Tony On Tue, Jun 25, 2013 at 08:15:45AM +0200, demerphq wrote:
[1] or run out of sanity <cackle> |
From @pjcjOn Tue, Jun 25, 2013 at 03:57:04PM +0200, Slaven Rezic wrote:
Pretty good, thanks to some recent work by Jim Keenan: http://cpancover.com/latest/Data-Dumper-2.145/index.html -- |
From @tonycozOn Tue, Jun 25, 2013 at 03:57:04PM +0200, Slaven Rezic wrote:
Somehow this message didn't make it into RT. There's one bug I can see with this change: $ ./perl -Ilib -MData::Dumper -e '++$Data::Dumper::Useqq; print Dumper("\x011")' This isn't present in the perl version: $ ./perl -Ilib -MData::Dumper -e '++$Data::Dumper::Useqq; ++$Data::Dumper::Useperl; print Dumper("\x011")' Tony |
From @tonycozOn Mon Jul 01 17:35:57 2013, tonyc wrote:
Attached the patch so people looking at the ticket don't get confused. Tony |
From @tonycoz0001-Data-Dumper-useqq-implementation-for-xs.patch>From 3bd39fb5d85722f06dd1741127eb52f3ed4068e1 Mon Sep 17 00:00:00 2001
From: Slaven Rezic <srezic@iconmobile.com>
Date: Mon, 27 Sep 2010 12:53:58 +0200
Subject: [PATCH] Data::Dumper: useqq implementation for xs
Tests are mainly unchanged, just a "cheat" and a couple of TODOs were
removed.
---
dist/Data-Dumper/Dumper.pm | 1 -
dist/Data-Dumper/Dumper.xs | 85 ++++++++++++++++++++++++++++++-------------
dist/Data-Dumper/t/dumper.t | 17 ++-------
3 files changed, 63 insertions(+), 40 deletions(-)
mode change 100644 => 100755 dist/Data-Dumper/t/dumper.t
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index 7c778dc..e11323a 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -221,7 +221,6 @@ sub DESTROY {}
sub Dump {
return &Dumpxs
unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
- $Data::Dumper::Useqq || (ref($_[0]) && $_[0]->{useqq}) ||
$Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
return &Dumpperl;
}
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index b74650a..2a19097 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -18,7 +18,7 @@
static I32 num_q (const char *s, STRLEN slen);
static I32 esc_q (char *dest, const char *src, STRLEN slen);
-static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
+static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq);
static I32 needs_quote(const char *s, STRLEN len);
static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
@@ -26,7 +26,7 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
SV *freezer, SV *toaster,
I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
- I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash);
+ I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq);
#ifndef HvNAME_get
#define HvNAME_get HvNAME
@@ -158,8 +158,9 @@ esc_q(char *d, const char *s, STRLEN slen)
return ret;
}
+/* this function is also misused for implementing $Useqq */
static I32
-esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
+esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
{
char *r, *rstart;
const char *s = src;
@@ -176,8 +177,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
int increment;
/* this will need EBCDICification */
- for (s = src; s < send; s += increment) {
- const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+ for (s = src; s < send; do_utf8 ? s += increment : s++) {
+ const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
/* check for invalid utf8 */
increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
@@ -195,6 +196,14 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
#endif
);
+#ifndef EBCDIC
+ } else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) {
+ grow += 2;
+ } else if (useqq && k <= 31) {
+ grow += 3;
+ } else if (useqq && k >= 127) {
+ grow += 4;
+#endif
} else if (k == '\\') {
backslashes++;
} else if (k == '\'') {
@@ -205,7 +214,7 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
normal++;
}
}
- if (grow) {
+ if (grow || useqq) {
/* We have something needing hex. 3 is ""\0 */
sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
+ 2*qq_escapables + normal);
@@ -213,8 +222,9 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
*r++ = '"';
- for (s = src; s < send; s += UTF8SKIP(s)) {
- const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+ for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
+ const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
+
if (k == '"' || k == '\\' || k == '$' || k == '@') {
*r++ = '\\';
@@ -224,6 +234,33 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
#ifdef EBCDIC
if (isprint(k) && k < 256)
#else
+ if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
+ *r++ = '\\';
+ switch (k) {
+ case 7: *r++ = 'a'; break;
+ case 8: *r++ = 'b'; break;
+ case 9: *r++ = 't'; break;
+ case 10: *r++ = 'n'; break;
+ case 12: *r++ = 'f'; break;
+ case 13: *r++ = 'r'; break;
+ case 27: *r++ = 'e'; break;
+ default:
+ /* faster than
+ * r = r + my_sprintf(r, "%o", k);
+ */
+ if (k <= 7) {
+ *r++ = (char)k + '0';
+ } else if (k <= 63) {
+ *r++ = (char)(k>>3) + '0';
+ *r++ = (char)(k&7) + '0';
+ } else {
+ *r++ = (char)(k>>6) + '0';
+ *r++ = (char)((k&63)>>3) + '0';
+ *r++ = (char)(k&7) + '0';
+ }
+ }
+ }
+ else
if (k < 0x80)
#endif
*r++ = (char)k;
@@ -298,7 +335,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys,
- int use_sparse_seen_hash)
+ int use_sparse_seen_hash, I32 useqq)
{
char tmpbuf[128];
U32 i;
@@ -524,7 +561,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys, use_sparse_seen_hash);
+ maxdepth, sortkeys, use_sparse_seen_hash, useqq);
sv_catpvn(retval, ")}", 2);
} /* plain */
else {
@@ -532,7 +569,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys, use_sparse_seen_hash);
+ maxdepth, sortkeys, use_sparse_seen_hash, useqq);
}
SvREFCNT_dec(namesv);
}
@@ -544,7 +581,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys, use_sparse_seen_hash);
+ maxdepth, sortkeys, use_sparse_seen_hash, useqq);
SvREFCNT_dec(namesv);
}
else if (realtype == SVt_PVAV) {
@@ -617,7 +654,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys, use_sparse_seen_hash);
+ maxdepth, sortkeys, use_sparse_seen_hash, useqq);
if (ix < ixmax)
sv_catpvn(retval, ",", 1);
}
@@ -777,9 +814,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
The code is also smaller (22044 vs 22260) because I've been
able to pull the common logic out to both sides. */
if (quotekeys || needs_quote(key,keylen)) {
- if (do_utf8) {
+ if (do_utf8 || useqq) {
STRLEN ocur = SvCUR(retval);
- nlen = esc_q_utf8(aTHX_ retval, key, klen);
+ nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq);
nkey = SvPVX(retval) + ocur;
}
else {
@@ -824,7 +861,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
postav, levelp, indent, pad, xpad, newapad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys, use_sparse_seen_hash);
+ maxdepth, sortkeys, use_sparse_seen_hash, useqq);
SvREFCNT_dec(sname);
Safefree(nkey_buffer);
if (indent >= 2)
@@ -973,7 +1010,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
r = SvPVX(retval)+SvCUR(retval);
r[0] = '*'; r[1] = '{';
SvCUR_set(retval, SvCUR(retval)+2);
- esc_q_utf8(aTHX_ retval, c, i);
+ esc_q_utf8(aTHX_ retval, c, i, 1, useqq);
sv_grow(retval, SvCUR(retval)+2);
r = SvPVX(retval)+SvCUR(retval);
r[0] = '}'; r[1] = '\0';
@@ -1033,7 +1070,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
seenhv, postav, &nlevel, indent, pad, xpad,
newapad, sep, pair, freezer, toaster, purity,
deepcopy, quotekeys, bless, maxdepth,
- sortkeys, use_sparse_seen_hash);
+ sortkeys, use_sparse_seen_hash, useqq);
SvREFCNT_dec(e);
}
}
@@ -1062,8 +1099,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
else {
integer_came_from_string:
c = SvPV(val, i);
- if (DO_UTF8(val))
- i += esc_q_utf8(aTHX_ retval, c, i);
+ if (DO_UTF8(val) || useqq)
+ i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq);
else {
sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
r = SvPVX(retval) + SvCUR(retval);
@@ -1108,7 +1145,7 @@ Data_Dumper_Dumpxs(href, ...)
HV *seenhv = NULL;
AV *postav, *todumpav, *namesav;
I32 level = 0;
- I32 indent, terse, i, imax, postlen;
+ I32 indent, terse, useqq, i, imax, postlen;
SV **svp;
SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
SV *freezer, *toaster, *bless, *sortkeys;
@@ -1149,7 +1186,7 @@ Data_Dumper_Dumpxs(href, ...)
= freezer = toaster = bless = sortkeys = &PL_sv_undef;
name = sv_newmortal();
indent = 2;
- terse = purity = deepcopy = 0;
+ terse = purity = deepcopy = useqq = 0;
quotekeys = 1;
retval = newSVpvn("", 0);
@@ -1173,10 +1210,8 @@ Data_Dumper_Dumpxs(href, ...)
purity = SvIV(*svp);
if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
terse = SvTRUE(*svp);
-#if 0 /* useqq currently unused */
if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
useqq = SvTRUE(*svp);
-#endif
if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
pad = *svp;
if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
@@ -1280,7 +1315,7 @@ Data_Dumper_Dumpxs(href, ...)
DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
postav, &level, indent, pad, xpad, newapad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys,
- bless, maxdepth, sortkeys, use_sparse_seen_hash);
+ bless, maxdepth, sortkeys, use_sparse_seen_hash, useqq);
SPAGAIN;
if (indent >= 2 && !terse)
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
old mode 100644
new mode 100755
index c1e5fe6..5ae287e
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -307,20 +307,9 @@ $foo = { "abc\000\'\efg" => "mno\000",
{
local $Data::Dumper::Useqq = 1;
TEST q(Dumper($foo));
+ TEST q(Data::Dumper::DumperX($foo)) if $XS;
}
- $WANT = <<"EOT";
-#\$VAR1 = {
-# 'abc\0\\'\efg' => 'mno\0',
-# 'reftest' => \\\\1
-#};
-EOT
-
- {
- local $Data::Dumper::Useqq = 1;
- TEST q(Data::Dumper::DumperX($foo)) if $XS; # cheat
- }
-
#############
@@ -1461,7 +1450,7 @@ EOT
$foo = [ join "", map chr, 0..255 ];
local $Data::Dumper::Useqq = 1;
TEST q(Dumper($foo)), 'All latin1 characters';
- for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+ TEST q(Data::Dumper::DumperX($foo)) if $XS;
}
############# 372
@@ -1481,7 +1470,7 @@ EOT
TEST q(Dumper($foo)),
'All latin1 characters with utf8 flag including a wide character';
}
- for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+ TEST q(Data::Dumper::DumperX($foo)) if $XS;
}
############# 378
--
1.7.2.5
|
From @tonycozOn Mon Jul 01 17:35:57 2013, tonyc wrote:
I've attached a new series of patches: 0001 - Slaven's patch, but don't make dumper.t +x Tony |
From @tonycoz0003-handle-xs-Useqq-dumping-of-strings-with-an-escape-fo.patchFrom a4c0e5ce766c5f361af161e4bb6b42f71b2aecd0 Mon Sep 17 00:00:00 2001
From: Tony Cook <tony@develop-help.com>
Date: Wed, 10 Jul 2013 14:54:20 +1000
Subject: [PATCH 3/3] handle xs Useqq dumping of strings with an escape
followed by a digit
The original patch didn't handle a string like "\x001" correctly, encoding
it as "\01" rather than "\0001".
Added tests for this case and some possible corner cases
---
dist/Data-Dumper/Dumper.pm | 2 +-
dist/Data-Dumper/Dumper.xs | 32 ++++++++++++++++++++++++++------
dist/Data-Dumper/t/dumper.t | 34 ++++++++++++++++++++++++++++++++--
3 files changed, 59 insertions(+), 9 deletions(-)
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index e11323a..fca6ab1 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -10,7 +10,7 @@
package Data::Dumper;
BEGIN {
- $VERSION = '2.146'; # Don't forget to set version and release
+ $VERSION = '2.147'; # Don't forget to set version and release
} # date in POD below!
#$| = 1;
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index 60fe404..0194a2c 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -175,6 +175,7 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
STRLEN qq_escapables = 0; /* " $ @ will need a \ in "" strings. */
STRLEN normal = 0;
int increment;
+ UV next;
/* this will need EBCDICification */
for (s = src; s < send; do_utf8 ? s += increment : s++) {
@@ -183,6 +184,12 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
/* check for invalid utf8 */
increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
+ /* this is only used to check if the next character is an
+ * ASCII digit, which are invariant, so if the following collects
+ * a UTF-8 start byte it does no harm
+ */
+ next = (s + increment >= send ) ? 0 : *(U8*)(s+increment);
+
#ifdef EBCDIC
if (!isprint(k) || k > 256) {
#else
@@ -197,11 +204,14 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
#endif
);
#ifndef EBCDIC
- } else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) {
+ } else if (useqq &&
+ /* we can't use the short form like '\0' if followed by a digit */
+ ((k >= 7 && k <= 10 || k == 12 || k == 13 || k == 27)
+ || (k < 8 && (next < '0' || next > '9')))) {
grow += 2;
- } else if (useqq && k <= 31) {
+ } else if (useqq && k <= 31 && (next < '0' || next > '9')) {
grow += 3;
- } else if (useqq && k >= 127) {
+ } else if (useqq && (k <= 31 || k >= 127)) {
grow += 4;
#endif
} else if (k == '\\') {
@@ -225,7 +235,6 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
-
if (k == '"' || k == '\\' || k == '$' || k == '@') {
*r++ = '\\';
*r++ = (char)k;
@@ -235,6 +244,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
if (isprint(k) && k < 256)
#else
if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
+ bool next_is_digit;
+
*r++ = '\\';
switch (k) {
case 7: *r++ = 'a'; break;
@@ -245,12 +256,21 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
case 13: *r++ = 'r'; break;
case 27: *r++ = 'e'; break;
default:
+ increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
+
+ /* only ASCII digits matter here, which are invariant,
+ * since we only encode characters \377 and under, or
+ * \x177 and under for a unicode string
+ */
+ next = (s+increment < send) ? *(U8*)(s+increment) : 0;
+ next_is_digit = next >= '0' && next <= '9';
+
/* faster than
* r = r + my_sprintf(r, "%o", k);
*/
- if (k <= 7) {
+ if (k <= 7 && !next_is_digit) {
*r++ = (char)k + '0';
- } else if (k <= 63) {
+ } else if (k <= 63 && !next_is_digit) {
*r++ = (char)(k>>3) + '0';
*r++ = (char)(k&7) + '0';
} else {
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
index 5ae287e..0bc4746 100644
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -83,11 +83,11 @@ sub SKIP_TEST {
$Data::Dumper::Useperl = 1;
if (defined &Data::Dumper::Dumpxs) {
print "### XS extension loaded, will run XS tests\n";
- $TMAX = 402; $XS = 1;
+ $TMAX = 420; $XS = 1;
}
else {
print "### XS extensions not loaded, will NOT run XS tests\n";
- $TMAX = 201; $XS = 0;
+ $TMAX = 213; $XS = 0;
}
print "1..$TMAX\n";
@@ -1526,3 +1526,33 @@ EOW
TEST q(Data::Dumper->Dumpxs([\*finkle])), 'blessed overloaded globs (xs)'
if $XS;
}
+############# 390
+{
+ # [perl #74798] uncovered behaviour
+ $WANT = <<'EOW';
+#$VAR1 = "\0000";
+EOW
+ local $Data::Dumper::Useqq = 1;
+ TEST q(Data::Dumper->Dump(["\x000"])),
+ "\\ octal followed by digit";
+ TEST q(Data::Dumper->Dumpxs(["\x000"])), '\\ octal followed by digit (xs)'
+ if $XS;
+
+ $WANT = <<'EOW';
+#$VAR1 = "\x{100}\0000";
+EOW
+ local $Data::Dumper::Useqq = 1;
+ TEST q(Data::Dumper->Dump(["\x{100}\x000"])),
+ "\\ octal followed by digit unicode";
+ TEST q(Data::Dumper->Dumpxs(["\x{100}\x000"])), '\\ octal followed by digit unicode (xs)'
+ if $XS;
+
+
+ $WANT = <<'EOW';
+#$VAR1 = "\0\x{660}";
+EOW
+ TEST q(Data::Dumper->Dump(["\\x00\\x{0660}"])),
+ "\\ octal followed by unicode digit";
+ TEST q(Data::Dumper->Dumpxs(["\\x00\\x{0660}"])), '\\ octal followed by unicode digit (xs)'
+ if $XS;
+}
--
1.7.10.4
|
From @tonycoz0001-Data-Dumper-useqq-implementation-for-xs.patchFrom 87058463731e54ccb3e1a9d7f3a3e605db56100b Mon Sep 17 00:00:00 2001
From: Slaven Rezic <srezic@iconmobile.com>
Date: Wed, 10 Jul 2013 14:18:18 +1000
Subject: [PATCH 1/3] Data::Dumper: useqq implementation for xs
Tests are mainly unchanged, just a "cheat" and a couple of TODOs were
removed.
---
dist/Data-Dumper/Dumper.pm | 1 -
dist/Data-Dumper/Dumper.xs | 85 ++++++++++++++++++++++++++++++-------------
dist/Data-Dumper/t/dumper.t | 17 ++-------
3 files changed, 63 insertions(+), 40 deletions(-)
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index 7c778dc..e11323a 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -221,7 +221,6 @@ sub DESTROY {}
sub Dump {
return &Dumpxs
unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
- $Data::Dumper::Useqq || (ref($_[0]) && $_[0]->{useqq}) ||
$Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
return &Dumpperl;
}
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index b74650a..2a19097 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -18,7 +18,7 @@
static I32 num_q (const char *s, STRLEN slen);
static I32 esc_q (char *dest, const char *src, STRLEN slen);
-static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
+static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq);
static I32 needs_quote(const char *s, STRLEN len);
static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
@@ -26,7 +26,7 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
SV *freezer, SV *toaster,
I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
- I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash);
+ I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq);
#ifndef HvNAME_get
#define HvNAME_get HvNAME
@@ -158,8 +158,9 @@ esc_q(char *d, const char *s, STRLEN slen)
return ret;
}
+/* this function is also misused for implementing $Useqq */
static I32
-esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
+esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
{
char *r, *rstart;
const char *s = src;
@@ -176,8 +177,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
int increment;
/* this will need EBCDICification */
- for (s = src; s < send; s += increment) {
- const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+ for (s = src; s < send; do_utf8 ? s += increment : s++) {
+ const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
/* check for invalid utf8 */
increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
@@ -195,6 +196,14 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
#endif
);
+#ifndef EBCDIC
+ } else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) {
+ grow += 2;
+ } else if (useqq && k <= 31) {
+ grow += 3;
+ } else if (useqq && k >= 127) {
+ grow += 4;
+#endif
} else if (k == '\\') {
backslashes++;
} else if (k == '\'') {
@@ -205,7 +214,7 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
normal++;
}
}
- if (grow) {
+ if (grow || useqq) {
/* We have something needing hex. 3 is ""\0 */
sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
+ 2*qq_escapables + normal);
@@ -213,8 +222,9 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
*r++ = '"';
- for (s = src; s < send; s += UTF8SKIP(s)) {
- const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+ for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
+ const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
+
if (k == '"' || k == '\\' || k == '$' || k == '@') {
*r++ = '\\';
@@ -224,6 +234,33 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
#ifdef EBCDIC
if (isprint(k) && k < 256)
#else
+ if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
+ *r++ = '\\';
+ switch (k) {
+ case 7: *r++ = 'a'; break;
+ case 8: *r++ = 'b'; break;
+ case 9: *r++ = 't'; break;
+ case 10: *r++ = 'n'; break;
+ case 12: *r++ = 'f'; break;
+ case 13: *r++ = 'r'; break;
+ case 27: *r++ = 'e'; break;
+ default:
+ /* faster than
+ * r = r + my_sprintf(r, "%o", k);
+ */
+ if (k <= 7) {
+ *r++ = (char)k + '0';
+ } else if (k <= 63) {
+ *r++ = (char)(k>>3) + '0';
+ *r++ = (char)(k&7) + '0';
+ } else {
+ *r++ = (char)(k>>6) + '0';
+ *r++ = (char)((k&63)>>3) + '0';
+ *r++ = (char)(k&7) + '0';
+ }
+ }
+ }
+ else
if (k < 0x80)
#endif
*r++ = (char)k;
@@ -298,7 +335,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys,
- int use_sparse_seen_hash)
+ int use_sparse_seen_hash, I32 useqq)
{
char tmpbuf[128];
U32 i;
@@ -524,7 +561,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys, use_sparse_seen_hash);
+ maxdepth, sortkeys, use_sparse_seen_hash, useqq);
sv_catpvn(retval, ")}", 2);
} /* plain */
else {
@@ -532,7 +569,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys, use_sparse_seen_hash);
+ maxdepth, sortkeys, use_sparse_seen_hash, useqq);
}
SvREFCNT_dec(namesv);
}
@@ -544,7 +581,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys, use_sparse_seen_hash);
+ maxdepth, sortkeys, use_sparse_seen_hash, useqq);
SvREFCNT_dec(namesv);
}
else if (realtype == SVt_PVAV) {
@@ -617,7 +654,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys, use_sparse_seen_hash);
+ maxdepth, sortkeys, use_sparse_seen_hash, useqq);
if (ix < ixmax)
sv_catpvn(retval, ",", 1);
}
@@ -777,9 +814,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
The code is also smaller (22044 vs 22260) because I've been
able to pull the common logic out to both sides. */
if (quotekeys || needs_quote(key,keylen)) {
- if (do_utf8) {
+ if (do_utf8 || useqq) {
STRLEN ocur = SvCUR(retval);
- nlen = esc_q_utf8(aTHX_ retval, key, klen);
+ nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq);
nkey = SvPVX(retval) + ocur;
}
else {
@@ -824,7 +861,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
postav, levelp, indent, pad, xpad, newapad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
- maxdepth, sortkeys, use_sparse_seen_hash);
+ maxdepth, sortkeys, use_sparse_seen_hash, useqq);
SvREFCNT_dec(sname);
Safefree(nkey_buffer);
if (indent >= 2)
@@ -973,7 +1010,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
r = SvPVX(retval)+SvCUR(retval);
r[0] = '*'; r[1] = '{';
SvCUR_set(retval, SvCUR(retval)+2);
- esc_q_utf8(aTHX_ retval, c, i);
+ esc_q_utf8(aTHX_ retval, c, i, 1, useqq);
sv_grow(retval, SvCUR(retval)+2);
r = SvPVX(retval)+SvCUR(retval);
r[0] = '}'; r[1] = '\0';
@@ -1033,7 +1070,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
seenhv, postav, &nlevel, indent, pad, xpad,
newapad, sep, pair, freezer, toaster, purity,
deepcopy, quotekeys, bless, maxdepth,
- sortkeys, use_sparse_seen_hash);
+ sortkeys, use_sparse_seen_hash, useqq);
SvREFCNT_dec(e);
}
}
@@ -1062,8 +1099,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
else {
integer_came_from_string:
c = SvPV(val, i);
- if (DO_UTF8(val))
- i += esc_q_utf8(aTHX_ retval, c, i);
+ if (DO_UTF8(val) || useqq)
+ i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq);
else {
sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
r = SvPVX(retval) + SvCUR(retval);
@@ -1108,7 +1145,7 @@ Data_Dumper_Dumpxs(href, ...)
HV *seenhv = NULL;
AV *postav, *todumpav, *namesav;
I32 level = 0;
- I32 indent, terse, i, imax, postlen;
+ I32 indent, terse, useqq, i, imax, postlen;
SV **svp;
SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
SV *freezer, *toaster, *bless, *sortkeys;
@@ -1149,7 +1186,7 @@ Data_Dumper_Dumpxs(href, ...)
= freezer = toaster = bless = sortkeys = &PL_sv_undef;
name = sv_newmortal();
indent = 2;
- terse = purity = deepcopy = 0;
+ terse = purity = deepcopy = useqq = 0;
quotekeys = 1;
retval = newSVpvn("", 0);
@@ -1173,10 +1210,8 @@ Data_Dumper_Dumpxs(href, ...)
purity = SvIV(*svp);
if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
terse = SvTRUE(*svp);
-#if 0 /* useqq currently unused */
if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
useqq = SvTRUE(*svp);
-#endif
if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
pad = *svp;
if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
@@ -1280,7 +1315,7 @@ Data_Dumper_Dumpxs(href, ...)
DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
postav, &level, indent, pad, xpad, newapad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys,
- bless, maxdepth, sortkeys, use_sparse_seen_hash);
+ bless, maxdepth, sortkeys, use_sparse_seen_hash, useqq);
SPAGAIN;
if (indent >= 2 && !terse)
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
index c1e5fe6..5ae287e 100644
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -307,20 +307,9 @@ $foo = { "abc\000\'\efg" => "mno\000",
{
local $Data::Dumper::Useqq = 1;
TEST q(Dumper($foo));
+ TEST q(Data::Dumper::DumperX($foo)) if $XS;
}
- $WANT = <<"EOT";
-#\$VAR1 = {
-# 'abc\0\\'\efg' => 'mno\0',
-# 'reftest' => \\\\1
-#};
-EOT
-
- {
- local $Data::Dumper::Useqq = 1;
- TEST q(Data::Dumper::DumperX($foo)) if $XS; # cheat
- }
-
#############
@@ -1461,7 +1450,7 @@ EOT
$foo = [ join "", map chr, 0..255 ];
local $Data::Dumper::Useqq = 1;
TEST q(Dumper($foo)), 'All latin1 characters';
- for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+ TEST q(Data::Dumper::DumperX($foo)) if $XS;
}
############# 372
@@ -1481,7 +1470,7 @@ EOT
TEST q(Dumper($foo)),
'All latin1 characters with utf8 flag including a wide character';
}
- for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+ TEST q(Data::Dumper::DumperX($foo)) if $XS;
}
############# 378
--
1.7.10.4
|
From @tonycoz0002-adjust-indentation-to-match-other-DD-code.patchFrom 155c29d345d666151ff3e56aa40a74cd737e9fab Mon Sep 17 00:00:00 2001
From: Tony Cook <tony@develop-help.com>
Date: Wed, 10 Jul 2013 14:19:17 +1000
Subject: [PATCH 2/3] adjust indentation to match other DD code
---
dist/Data-Dumper/Dumper.xs | 47 ++++++++++++++++++++++----------------------
1 file changed, 23 insertions(+), 24 deletions(-)
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index 2a19097..60fe404 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -237,31 +237,30 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
*r++ = '\\';
switch (k) {
- case 7: *r++ = 'a'; break;
- case 8: *r++ = 'b'; break;
- case 9: *r++ = 't'; break;
- case 10: *r++ = 'n'; break;
- case 12: *r++ = 'f'; break;
- case 13: *r++ = 'r'; break;
- case 27: *r++ = 'e'; break;
- default:
- /* faster than
- * r = r + my_sprintf(r, "%o", k);
- */
- if (k <= 7) {
- *r++ = (char)k + '0';
- } else if (k <= 63) {
- *r++ = (char)(k>>3) + '0';
- *r++ = (char)(k&7) + '0';
- } else {
- *r++ = (char)(k>>6) + '0';
- *r++ = (char)((k&63)>>3) + '0';
- *r++ = (char)(k&7) + '0';
- }
+ case 7: *r++ = 'a'; break;
+ case 8: *r++ = 'b'; break;
+ case 9: *r++ = 't'; break;
+ case 10: *r++ = 'n'; break;
+ case 12: *r++ = 'f'; break;
+ case 13: *r++ = 'r'; break;
+ case 27: *r++ = 'e'; break;
+ default:
+ /* faster than
+ * r = r + my_sprintf(r, "%o", k);
+ */
+ if (k <= 7) {
+ *r++ = (char)k + '0';
+ } else if (k <= 63) {
+ *r++ = (char)(k>>3) + '0';
+ *r++ = (char)(k&7) + '0';
+ } else {
+ *r++ = (char)(k>>6) + '0';
+ *r++ = (char)((k&63)>>3) + '0';
+ *r++ = (char)(k&7) + '0';
+ }
}
- }
- else
- if (k < 0x80)
+ }
+ else if (k < 0x80)
#endif
*r++ = (char)k;
else {
--
1.7.10.4
|
From @tonycozOn Tue Jul 09 22:22:37 2013, tonyc wrote:
Applied as: Tony |
@tonycoz - Status changed from 'open' to 'resolved' |
From @demerphqOn 17 July 2013 03:11, Tony Cook via RT <perlbug-followup@perl.org> wrote:
I dont think this patch can be right (I have not verified) and I dont #else I see nothing that deals with something like this: "\0000" That is, a string containing an octal null followed by a zero. Or similar cases. The pureperl implementation DOES deal with this: $ perl -MData::Dumper -e'$Data::Dumper::Useqq=1; print Dumper("\0000")' Now, I admit I havent *tried* the patch, but I did not see any code to Yves -- |
From @eserteyves orton via RT wrote:
Seems to work: 11:32 slavenr@dev08 blead ( Regards, |
From @cpansproutOn Fri Jul 19 12:18:01 2013, srezic@iconmobile.com wrote:
Actually, there is one problem. Would you be willing to look into -- Father Chrysostomos |
From @jkeenanOn Fri Jul 19 18:01:02 2013, sprout wrote:
Given the questions raised by demerphq and by Father C in this post, I Thank you very much. |
@jkeenan - Status changed from 'resolved' to 'open' |
From @tonycozOn Wed Jul 17 07:14:53 2013, demerphq wrote:
Slaven's original patch didn't handle this case, I wrote a follow-up That said, I need to look at the BBC failure to see what I've broken. Tony |
From @demerphqOn 22 July 2013 02:32, Tony Cook via RT <perlbug-followup@perl.org> wrote:
Yeah, I meant to reply to this thread thanking you for that. :-) Thanks man! Yves -- |
From @tonycozOn Fri Jul 19 19:16:12 2013, jkeenan wrote:
I've fixed the failure in autobox::dump, and opened a ticket against https://rt.cpan.org/Ticket/Display.html?id=87171 Tony |
From @tonycozOn Mon Jul 22 16:56:36 2013, tonyc wrote:
And closing this one (again) too. Tony |
@tonycoz - Status changed from 'open' to 'resolved' |
Migrated from rt.perl.org#74798 (status was 'resolved')
Searchable as RT74798$
The text was updated successfully, but these errors were encountered: