Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PATCH] Data::Dumper: useqq implementation for xs #10357

Closed
p5pRT opened this issue Apr 30, 2010 · 32 comments
Closed

[PATCH] Data::Dumper: useqq implementation for xs #10357

p5pRT opened this issue Apr 30, 2010 · 32 comments

Comments

@p5pRT
Copy link

p5pRT commented Apr 30, 2010

Migrated from rt.perl.org#74798 (status was 'resolved')

Searchable as RT74798$

@p5pRT
Copy link
Author

p5pRT commented Apr 30, 2010

From @eserte

The patch basically misuses the existing esc_q_utf8 function which
already does limited "..." escaping, and adds additional escaping for
characters < 0x20.

Regards,
  Slaven

@p5pRT
Copy link
Author

p5pRT commented Apr 30, 2010

From @eserte

work2

@p5pRT
Copy link
Author

p5pRT commented Apr 30, 2010

From @eserte

0001-Data-Dumper-useqq-implementation-for-xs.patch
From b642e10dcde67659983ca267108434b0a4b9cf07 Mon Sep 17 00:00:00 2001
From: Slaven Rezic <srezic@iconmobile.com>
Date: Thu, 29 Apr 2010 18:02:20 +0200
Subject: [PATCH] Data::Dumper: useqq implementation for xs

---
 dist/Data-Dumper/Dumper.pm  |    1 -
 dist/Data-Dumper/Dumper.xs  |   67 ++++++++++++++++++++++++++++++-------------
 dist/Data-Dumper/t/dumper.t |   31 ++++++++++---------
 3 files changed, 63 insertions(+), 36 deletions(-)
 mode change 100644 => 100755 dist/Data-Dumper/t/dumper.t

diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index 5967642..1947b3f 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -189,7 +189,6 @@ sub DESTROY {}
 sub Dump {
     return &Dumpxs
 	unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
-	       $Data::Dumper::Useqq   || (ref($_[0]) && $_[0]->{useqq}) ||
 	       $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
     return &Dumpperl;
 }
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index f2c1821..7f0235a 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -14,7 +14,7 @@
 
 static I32 num_q (const char *s, STRLEN slen);
 static I32 esc_q (char *dest, const char *src, STRLEN slen);
-static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
+static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 useqq);
 static I32 needs_quote(register const char *s);
 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
@@ -22,7 +22,7 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
 		    SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
 		    SV *freezer, SV *toaster,
 		    I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
-		    I32 maxdepth, SV *sortkeys);
+		    I32 maxdepth, SV *sortkeys, I32 useqq);
 
 #ifndef HvNAME_get
 #define HvNAME_get HvNAME
@@ -127,8 +127,10 @@ esc_q(register char *d, register const char *s, register STRLEN slen)
     return ret;
 }
 
+/* this function is also misused for implementing $Useqq */
 static I32
-esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
+esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen,
+	   register I32 useqq)
 {
     char *r, *rstart;
     const char *s = src;
@@ -160,6 +162,12 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
 #endif
                 );
+#ifndef EBCDIC
+	} else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) {
+	    grow += 2;
+	} else if (useqq && k <= 31) {
+	    grow += 3;
+#endif
         } else if (k == '\\') {
             backslashes++;
         } else if (k == '\'') {
@@ -170,7 +178,7 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
             normal++;
         }
     }
-    if (grow) {
+    if (grow || useqq) {
         /* We have something needing hex. 3 is ""\0 */
         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
 		+ 2*qq_escapables + normal);
@@ -189,6 +197,26 @@ esc_q_utf8(pTHX_ SV* sv, register const char *src, register STRLEN slen)
 #ifdef EBCDIC
 	      if (isprint(k) && k < 256)
 #else
+	      if (useqq && k <= 31) {
+		*r++ = '\\';
+		switch (k) {
+		    case 7:  *r++ = 'a'; break;
+		    case 8:  *r++ = 'b'; break;
+		    case 9:  *r++ = 't'; break;
+		    case 10: *r++ = 'n'; break;
+		    case 12: *r++ = 'f'; break;
+		    case 13: *r++ = 'r'; break;
+		    case 27: *r++ = 'e'; break;
+                    default:
+                      if (k <= 7) {
+                        *r++ = (char)k + '0';
+                      } else {
+                        *r++ = (char)(k/8) + '0';
+                        *r++ = (char)(k%8) + '0';
+                      }
+		}
+	      }
+	      else
 	      if (k < 0x80)
 #endif
                 *r++ = (char)k;
@@ -262,7 +290,8 @@ static I32
 DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
 	SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
-	I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys)
+	I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys,
+	I32 useqq)
 {
     char tmpbuf[128];
     U32 i;
@@ -485,7 +514,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
 			postav, levelp,	indent, pad, xpad, apad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys);
+			maxdepth, sortkeys, useqq);
 		sv_catpvn(retval, ")}", 2);
 	    }						     /* plain */
 	    else {
@@ -493,7 +522,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
 			postav, levelp,	indent, pad, xpad, apad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys);
+			maxdepth, sortkeys, useqq);
 	    }
 	    SvREFCNT_dec(namesv);
 	}
@@ -505,7 +534,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	    DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
 		    postav, levelp,	indent, pad, xpad, apad, sep, pair,
 		    freezer, toaster, purity, deepcopy, quotekeys, bless,
-		    maxdepth, sortkeys);
+		    maxdepth, sortkeys, useqq);
 	    SvREFCNT_dec(namesv);
 	}
 	else if (realtype == SVt_PVAV) {
@@ -578,7 +607,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
 			levelp,	indent, pad, xpad, apad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys);
+			maxdepth, sortkeys, useqq);
 		if (ix < ixmax)
 		    sv_catpvn(retval, ",", 1);
 	    }
@@ -738,9 +767,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                    The code is also smaller (22044 vs 22260) because I've been
                    able to pull the common logic out to both sides.  */
                 if (quotekeys || needs_quote(key)) {
-                    if (do_utf8) {
+                    if (do_utf8 || useqq) {
                         STRLEN ocur = SvCUR(retval);
-                        nlen = esc_q_utf8(aTHX_ retval, key, klen);
+                        nlen = esc_q_utf8(aTHX_ retval, key, klen, useqq);
                         nkey = SvPVX(retval) + ocur;
                     }
                     else {
@@ -785,7 +814,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
 			postav, levelp,	indent, pad, xpad, newapad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys);
+			maxdepth, sortkeys, useqq);
 		SvREFCNT_dec(sname);
 		Safefree(nkey_buffer);
 		if (indent >= 2)
@@ -965,7 +994,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 				seenhv, postav, &nlevel, indent, pad, xpad,
 				newapad, sep, pair, freezer, toaster, purity,
 				deepcopy, quotekeys, bless, maxdepth, 
-				sortkeys);
+				sortkeys, useqq);
 			SvREFCNT_dec(e);
 		    }
 		}
@@ -980,8 +1009,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	else {
         integer_came_from_string:
 	    c = SvPV(val, i);
-	    if (DO_UTF8(val))
-	        i += esc_q_utf8(aTHX_ retval, c, i);
+	    if (DO_UTF8(val) || useqq)
+	        i += esc_q_utf8(aTHX_ retval, c, i, useqq);
 	    else {
 		sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
 		r = SvPVX(retval) + SvCUR(retval);
@@ -1026,7 +1055,7 @@ Data_Dumper_Dumpxs(href, ...)
 	    HV *seenhv = NULL;
 	    AV *postav, *todumpav, *namesav;
 	    I32 level = 0;
-	    I32 indent, terse, i, imax, postlen;
+	    I32 indent, terse, useqq, i, imax, postlen;
 	    SV **svp;
 	    SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
 	    SV *freezer, *toaster, *bless, *sortkeys;
@@ -1065,7 +1094,7 @@ Data_Dumper_Dumpxs(href, ...)
 		= freezer = toaster = bless = sortkeys = &PL_sv_undef;
 	    name = sv_newmortal();
 	    indent = 2;
-	    terse = purity = deepcopy = 0;
+	    terse = purity = deepcopy = useqq = 0;
 	    quotekeys = 1;
 	
 	    retval = newSVpvn("", 0);
@@ -1085,10 +1114,8 @@ Data_Dumper_Dumpxs(href, ...)
 		    purity = SvIV(*svp);
 		if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
 		    terse = SvTRUE(*svp);
-#if 0 /* useqq currently unused */
 		if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
 		    useqq = SvTRUE(*svp);
-#endif
 		if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
 		    pad = *svp;
 		if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
@@ -1191,7 +1218,7 @@ Data_Dumper_Dumpxs(href, ...)
 		    DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
 			    postav, &level, indent, pad, xpad, newapad, sep, pair,
 			    freezer, toaster, purity, deepcopy, quotekeys,
-			    bless, maxdepth, sortkeys);
+			    bless, maxdepth, sortkeys, useqq);
 		
 		    if (indent >= 2 && !terse)
 			SvREFCNT_dec(newapad);
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
old mode 100644
new mode 100755
index de5e87c..0b4c6f9
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -83,11 +83,11 @@ sub SKIP_TEST {
 $Data::Dumper::Useperl = 1;
 if (defined &Data::Dumper::Dumpxs) {
   print "### XS extension loaded, will run XS tests\n";
-  $TMAX = 363; $XS = 1;
+  $TMAX = 369; $XS = 1;
 }
 else {
   print "### XS extensions not loaded, will NOT run XS tests\n";
-  $TMAX = 183; $XS = 0;
+  $TMAX = 186; $XS = 0;
 }
 
 print "1..$TMAX\n";
@@ -296,21 +296,9 @@ $foo = { "abc\000\'\efg" => "mno\000",
 {
   local $Data::Dumper::Useqq = 1;
   TEST q(Dumper($foo));
+  TEST q(Data::Dumper::DumperX($foo)) if $XS;
 }
 
-  $WANT = <<"EOT";
-#\$VAR1 = {
-#  'abc\0\\'\efg' => 'mno\0',
-#  'reftest' => \\\\1
-#};
-EOT
-
-  {
-    local $Data::Dumper::Useqq = 1;
-    TEST q(Data::Dumper::DumperX($foo)) if $XS;   # cheat
-  }
-
-
 
 #############
 #############
@@ -1430,3 +1418,16 @@ EOT
 }
 
 
+############# 363
+{
+  $WANT = <<'EOT';
+#$VAR1 = [
+#  "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37"
+#];
+EOT
+
+  $foo = [ "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37" ];
+  local $Data::Dumper::Useqq = 1;
+  TEST q(Dumper($foo));
+  TEST q(Data::Dumper::DumperX($foo)) if $XS;
+}
-- 
1.7.0.3

@p5pRT
Copy link
Author

p5pRT commented Apr 30, 2010

From @eserte

The attachment named "work" went into the bug report by accident and may
be deleted.

Regards,
  Slaven

@p5pRT
Copy link
Author

p5pRT commented Sep 27, 2010

From @cpansprout

On Fri Apr 30 01​:52​:00 2010, srezic@​iconmobile.com wrote​:

The patch basically misuses the existing esc_q_utf8 function which
already does limited "..." escaping, and adds additional escaping for
characters < 0x20.

Regards,
Slaven

Thank you for the patch. It looks nice, but there’s one problem​:

$ ./perl -Ilib -MData​::Dumper -le '++$Data​::Dumper​::Useqq; print Dumper
join "", map chr, 0..255'
Malformed UTF-8 character (overflow at 0xc41461c8, byte 0xc9, after
start byte 0xbf) in subroutine entry at lib/Data/Dumper.pm line 190.
Malformed UTF-8 character (overflow at 0xc41461c8, byte 0xc9, after
start byte 0xbf) in subroutine entry at lib/Data/Dumper.pm line 190.
$VAR1 =
"\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37
!\"#\$%&'()*+,-./0123456789​:;<=>?\@​ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\1B\x{83}\x{c4}\x{105}\x{146}\x{187}\x{1c8}\x{209}\x{24a}\x{28b}\x{2cc}\x{30d}\x{34e}\x{38f}\x{3d0}\x{411}\x{452}\x{493}\x{4d4}\x{515}\x{556}\x{597}\x{5d8}\x{619}\x{65a}\x{69b}\x{6dc}\x{71d}\x{75e}\x{79f}\x{7e0}\x{862}\x{18a3}\x{28e4}\x{3925}\x{4966}\x{59a7}\x{69e8}\x{7a29}\x{8a6a}\x{9aab}\x{aaec}\x{bb2d}\x{cb6e}\x{dbaf}\x{ebf0}\x{fc31}\x{31cb3}\x{72cf4}\x{b3d35}\x{f4d76}\x{135db7}\x{176df8}\x{1b7e39}\x{1f8e7a}\x{e7aefc}\x{1ebbf3d}\x{2efcf7e}\x{3f3dfbf}\x{3dfbf001}\x{7efc0042}\x{c00420c4}\0\1\x{83}\x{105}\x{187}\x{209}\x{28b}\x{30d}\x{38f}\x{411}\x{493}\x{515}\x{597}\x{619}\x{69b}\x{71d}\x{79f}\x{862}\x{3925}\x{69e8}\x{9aab}\x{cb6e}\x{fc31}\x{b3d35}\x{1b7e39}\x{2efcf7e}\0";

This is the output I get with the pure-Perl version​:

$ perl -MData​::Dumper -le '++$Data​::Dumper​::Useqq; print Dumper join "",
map chr, 0..255'
$VAR1 =
"\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37
!\"#\$%&'()*+,-./0123456789​:;<=>?\@​ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377";

@p5pRT
Copy link
Author

p5pRT commented Sep 27, 2010

The RT System itself - Status changed from 'new' to 'open'

@p5pRT
Copy link
Author

p5pRT commented Sep 27, 2010

From @eserte

Father Chrysostomos via RT wrote​:

On Fri Apr 30 01​:52​:00 2010, srezic@​iconmobile.com wrote​:

The patch basically misuses the existing esc_q_utf8 function which
already does limited "..." escaping, and adds additional escaping for
characters < 0x20.

Regards,
Slaven

Thank you for the patch. It looks nice, but there’s one problem​:

$ ./perl -Ilib -MData​::Dumper -le '++$Data​::Dumper​::Useqq; print Dumper
join "", map chr, 0..255'
Malformed UTF-8 character (overflow at 0xc41461c8, byte 0xc9, after
start byte 0xbf) in subroutine entry at lib/Data/Dumper.pm line 190.
Malformed UTF-8 character (overflow at 0xc41461c8, byte 0xc9, after
start byte 0xbf) in subroutine entry at lib/Data/Dumper.pm line 190.
$VAR1 =
"\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37
!\"#\$%&'()*+,-./0123456789​:;<=>?\@​ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\1B\x{83}\x{c4}\x{105}\x{146}\x{187}\x{1c8}\x{209}\x{24a}\x{28b}\x{2cc}\x{30d}\x{34e}\x{38f}\x{3d0}\x{411}\x{452}\x{493}\x{4d4}\x{515}\x{556}\x{597}\x{5d8}\x{619}\x{65a}\x{69b}\x{6dc}\x{71d}\x{75e}\x{79f}\x{7e0}\x{862}\x{18a3}\x{28e4}\x{3925}\x{4966}\x{59a7}\x{69e8}\x{7a29}\x{8a6a}\x{9aab}\x{aaec}\x{bb2d}\x{cb6e}\x{dbaf}\x{ebf0}\x{fc31}\x{31cb3}\x{72cf4}\x{b3d35}\x{f4d76}\x{135db7}\x{176df8}\x{1b7e39}\x{1f8e7a}\x{e7aefc}\x{1ebbf3d}\x{2efcf7e}\x{3f3dfbf}\x{3dfbf001}\x{7efc0042}\x{c00420c4}\0\1\x{83}\x{105}\x{187}\x{209}\x{28b}\x{30d}\x{38f}\x{411}\x{493}\x{515}\x{597}\x{619}\x{69b}\x{71d}\x{79f}\x{862}\x{3925}\x{69e8}\x{9aab}\x{cb6e}\x{fc31}\x{b3d35}\x{1b7e39}\x{2efcf7e}\0";

This is the output I get with the pure-Perl version​:

$ perl -MData​::Dumper -le '++$Data​::Dumper​::Useqq; print Dumper join "",
map chr, 0..255'
$VAR1 =
"\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37
!\"#\$%&'()*+,-./0123456789​:;<=>?\@​ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377";

This suggests that there are some tests missing. So please accept first
the attached patch which just add tests for all of 0..255. I'll look
into the problems later.

Regards,
  Slaven

@p5pRT
Copy link
Author

p5pRT commented Sep 27, 2010

From @eserte

0001-test-Data-Dumper-with-all-latin1-characters.patch
From 295088638affff387e10d4fcc186be6b65b07d14 Mon Sep 17 00:00:00 2001
From: Slaven Rezic <srezic@iconmobile.com>
Date: Mon, 27 Sep 2010 12:36:58 +0200
Subject: [PATCH] test Data::Dumper with all latin1 characters

---
 dist/Data-Dumper/t/dumper.t |   31 +++++++++++++++++++++++++++++--
 1 files changed, 29 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 dist/Data-Dumper/t/dumper.t

diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
old mode 100644
new mode 100755
index 17a7466..915d46d
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -83,11 +83,11 @@ sub SKIP_TEST {
 $Data::Dumper::Useperl = 1;
 if (defined &Data::Dumper::Dumpxs) {
   print "### XS extension loaded, will run XS tests\n";
-  $TMAX = 366; $XS = 1;
+  $TMAX = 378; $XS = 1;
 }
 else {
   print "### XS extensions not loaded, will NOT run XS tests\n";
-  $TMAX = 183; $XS = 0;
+  $TMAX = 189; $XS = 0;
 }
 
 print "1..$TMAX\n";
@@ -1439,3 +1439,30 @@ TEST q(join " ", new Data::Dumper [[]],[] =>->Dumpxs),
     '$obj->Dumpxs in list context'
  if $XS;
 
+############# 366
+{
+  $WANT = <<'EOT';
+#$VAR1 = [
+#  "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
+#];
+EOT
+
+  $foo = [ join "", map chr, 0..255 ];
+  local $Data::Dumper::Useqq = 1;
+  TEST q(Dumper($foo)), 'All latin1 characters';
+  for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+}
+
+############# 372
+{
+  $WANT = <<'EOT';
+#$VAR1 = [
+#  "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\x{80}\x{81}\x{82}\x{83}\x{84}\x{85}\x{86}\x{87}\x{88}\x{89}\x{8a}\x{8b}\x{8c}\x{8d}\x{8e}\x{8f}\x{90}\x{91}\x{92}\x{93}\x{94}\x{95}\x{96}\x{97}\x{98}\x{99}\x{9a}\x{9b}\x{9c}\x{9d}\x{9e}\x{9f}\x{a0}\x{a1}\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}\x{a7}\x{a8}\x{a9}\x{aa}\x{ab}\x{ac}\x{ad}\x{ae}\x{af}\x{b0}\x{b1}\x{b2}\x{b3}\x{b4}\x{b5}\x{b6}\x{b7}\x{b8}\x{b9}\x{ba}\x{bb}\x{bc}\x{bd}\x{be}\x{bf}\x{c0}\x{c1}\x{c2}\x{c3}\x{c4}\x{c5}\x{c6}\x{c7}\x{c8}\x{c9}\x{ca}\x{cb}\x{cc}\x{cd}\x{ce}\x{cf}\x{d0}\x{d1}\x{d2}\x{d3}\x{d4}\x{d5}\x{d6}\x{d7}\x{d8}\x{d9}\x{da}\x{db}\x{dc}\x{dd}\x{de}\x{df}\x{e0}\x{e1}\x{e2}\x{e3}\x{e4}\x{e5}\x{e6}\x{e7}\x{e8}\x{e9}\x{ea}\x{eb}\x{ec}\x{ed}\x{ee}\x{ef}\x{f0}\x{f1}\x{f2}\x{f3}\x{f4}\x{f5}\x{f6}\x{f7}\x{f8}\x{f9}\x{fa}\x{fb}\x{fc}\x{fd}\x{fe}\x{ff}\x{20ac}"
+#];
+EOT
+
+  $foo = [ join "", map chr, 0..255, 0x20ac ];
+  local $Data::Dumper::Useqq = 1;
+  TEST q(Dumper($foo)), 'All latin1 characters with utf8 flag including a wide character';
+  for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+}
-- 
1.7.0.3

@p5pRT
Copy link
Author

p5pRT commented Sep 28, 2010

From @cpansprout

On Mon Sep 27 04​:53​:05 2010, srezic@​iconmobile.com wrote​:

This suggests that there are some tests missing. So please accept
first
the attached patch which just add tests for all of 0..255. I'll look
into the problems later.

Thank you. I’ve applied it as 45e462c.

@p5pRT
Copy link
Author

p5pRT commented Jun 25, 2013

From @tonycoz

On Mon Sep 27 04​:53​:05 2010, srezic@​iconmobile.com wrote​:

This suggests that there are some tests missing. So please accept
first
the attached patch which just add tests for all of 0..255. I'll look
into the problems later.

Hi Slaven,

Did you ever have a chance to look further into this?

Tony

@p5pRT
Copy link
Author

p5pRT commented Jun 25, 2013

From @demerphq

its obvious to me his patch treated raw binary as utf8. dd only uses octal
when the string is non utf8

On Tuesday, 25 June 2013, Tony Cook via RT wrote​:

On Mon Sep 27 04​:53​:05 2010, srezic@​iconmobile.com <javascript​:;> wrote​:

This suggests that there are some tests missing. So please accept
first
the attached patch which just add tests for all of 0..255. I'll look
into the problems later.

Hi Slaven,

Did you ever have a chance to look further into this?

Tony

---
via perlbug​: queue​: perl5 status​: open
https://rt-archive.perl.org/perl5/Ticket/Display.html?id=74798

--
perl -Mre=debug -e "/just|another|perl|hacker/"

@p5pRT
Copy link
Author

p5pRT commented Jun 25, 2013

From @tonycoz

I may look at it once I run out of patch tickets to look at[1], but if
he has some of the work done, or simply never got around to sending it
in, it will save some effort.

Tony

On Tue, Jun 25, 2013 at 08​:15​:45AM +0200, demerphq wrote​:

its obvious to me his patch treated raw binary as utf8. dd only uses octal
when the string is non utf8

On Tuesday, 25 June 2013, Tony Cook via RT wrote​:

On Mon Sep 27 04​:53​:05 2010, srezic@​iconmobile.com <javascript​:;> wrote​:

This suggests that there are some tests missing. So please accept
first
the attached patch which just add tests for all of 0..255. I'll look
into the problems later.

Hi Slaven,

Did you ever have a chance to look further into this?

[1] or run out of sanity <cackle>

@p5pRT
Copy link
Author

p5pRT commented Jun 28, 2013

From @pjcj

On Tue, Jun 25, 2013 at 03​:57​:04PM +0200, Slaven Rezic wrote​:

(Anyway, how good's test coverage of Data​::Dumper?)

Pretty good, thanks to some recent work by Jim Keenan​:

http​://cpancover.com/latest/Data-Dumper-2.145/index.html

--
Paul Johnson - paul@​pjcj.net
http​://www.pjcj.net

@p5pRT
Copy link
Author

p5pRT commented Jul 2, 2013

From @tonycoz

On Tue, Jun 25, 2013 at 03​:57​:04PM +0200, Slaven Rezic wrote​:

Tony Cook via RT wrote​:

On Mon Sep 27 04​:53​:05 2010, srezic@​iconmobile.com wrote​:

This suggests that there are some tests missing. So please accept
first
the attached patch which just add tests for all of 0..255. I'll look
into the problems later.
Hi Slaven,

Did you ever have a chance to look further into this?

Tony

Hi Tony,

here's a revised patch.

(Anyway, how good's test coverage of Data​::Dumper?)

Somehow this message didn't make it into RT.

There's one bug I can see with this change​:

  $ ./perl -Ilib -MData​::Dumper -e '++$Data​::Dumper​::Useqq; print Dumper("\x011")'
  $VAR1 = "\11";

This isn't present in the perl version​:

  $ ./perl -Ilib -MData​::Dumper -e '++$Data​::Dumper​::Useqq; ++$Data​::Dumper​::Useperl; print Dumper("\x011")'
  $VAR1 = "\0011";

Tony

@p5pRT
Copy link
Author

p5pRT commented Jul 10, 2013

From @tonycoz

On Mon Jul 01 17​:35​:57 2013, tonyc wrote​:

On Tue, Jun 25, 2013 at 03​:57​:04PM +0200, Slaven Rezic wrote​:

here's a revised patch.
Somehow this message didn't make it into RT.

Attached the patch so people looking at the ticket don't get confused.

Tony

@p5pRT
Copy link
Author

p5pRT commented Jul 10, 2013

From @tonycoz

0001-Data-Dumper-useqq-implementation-for-xs.patch
>From 3bd39fb5d85722f06dd1741127eb52f3ed4068e1 Mon Sep 17 00:00:00 2001
From: Slaven Rezic <srezic@iconmobile.com>
Date: Mon, 27 Sep 2010 12:53:58 +0200
Subject: [PATCH] Data::Dumper: useqq implementation for xs

Tests are mainly unchanged, just a "cheat" and a couple of TODOs were
removed.
---
 dist/Data-Dumper/Dumper.pm  |    1 -
 dist/Data-Dumper/Dumper.xs  |   85 ++++++++++++++++++++++++++++++-------------
 dist/Data-Dumper/t/dumper.t |   17 ++-------
 3 files changed, 63 insertions(+), 40 deletions(-)
 mode change 100644 => 100755 dist/Data-Dumper/t/dumper.t

diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index 7c778dc..e11323a 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -221,7 +221,6 @@ sub DESTROY {}
 sub Dump {
     return &Dumpxs
     unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
-           $Data::Dumper::Useqq   || (ref($_[0]) && $_[0]->{useqq}) ||
            $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
     return &Dumpperl;
 }
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index b74650a..2a19097 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -18,7 +18,7 @@
 
 static I32 num_q (const char *s, STRLEN slen);
 static I32 esc_q (char *dest, const char *src, STRLEN slen);
-static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
+static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq);
 static I32 needs_quote(const char *s, STRLEN len);
 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
@@ -26,7 +26,7 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
 		    SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
 		    SV *freezer, SV *toaster,
 		    I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
-		    I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash);
+		    I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq);
 
 #ifndef HvNAME_get
 #define HvNAME_get HvNAME
@@ -158,8 +158,9 @@ esc_q(char *d, const char *s, STRLEN slen)
     return ret;
 }
 
+/* this function is also misused for implementing $Useqq */
 static I32
-esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
+esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
 {
     char *r, *rstart;
     const char *s = src;
@@ -176,8 +177,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
     int increment;
 
     /* this will need EBCDICification */
-    for (s = src; s < send; s += increment) {
-        const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+    for (s = src; s < send; do_utf8 ? s += increment : s++) {
+        const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
 
         /* check for invalid utf8 */
         increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
@@ -195,6 +196,14 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
 #endif
                 );
+#ifndef EBCDIC
+	} else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) {
+	    grow += 2;
+	} else if (useqq && k <= 31) {
+	    grow += 3;
+	} else if (useqq && k >= 127) {
+	    grow += 4;
+#endif
         } else if (k == '\\') {
             backslashes++;
         } else if (k == '\'') {
@@ -205,7 +214,7 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
             normal++;
         }
     }
-    if (grow) {
+    if (grow || useqq) {
         /* We have something needing hex. 3 is ""\0 */
         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
 		+ 2*qq_escapables + normal);
@@ -213,8 +222,9 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
 
         *r++ = '"';
 
-        for (s = src; s < send; s += UTF8SKIP(s)) {
-            const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+        for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
+            const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
+
 
             if (k == '"' || k == '\\' || k == '$' || k == '@') {
                 *r++ = '\\';
@@ -224,6 +234,33 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
 #ifdef EBCDIC
 	      if (isprint(k) && k < 256)
 #else
+	      if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
+		*r++ = '\\';
+		switch (k) {
+		    case 7:  *r++ = 'a'; break;
+		    case 8:  *r++ = 'b'; break;
+		    case 9:  *r++ = 't'; break;
+		    case 10: *r++ = 'n'; break;
+		    case 12: *r++ = 'f'; break;
+		    case 13: *r++ = 'r'; break;
+		    case 27: *r++ = 'e'; break;
+                    default:
+                      /* faster than
+                       * r = r + my_sprintf(r, "%o", k);
+                       */
+                      if (k <= 7) {
+                        *r++ = (char)k + '0';
+                      } else if (k <= 63) {
+                        *r++ = (char)(k>>3) + '0';
+                        *r++ = (char)(k&7) + '0';
+                      } else {
+                        *r++ = (char)(k>>6) + '0';
+                        *r++ = (char)((k&63)>>3) + '0';
+                        *r++ = (char)(k&7) + '0';
+		      }
+		}
+	      }
+	      else
 	      if (k < 0x80)
 #endif
                 *r++ = (char)k;
@@ -298,7 +335,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
 	SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
 	I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys,
-        int use_sparse_seen_hash)
+        int use_sparse_seen_hash, I32 useqq)
 {
     char tmpbuf[128];
     U32 i;
@@ -524,7 +561,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
 			postav, levelp,	indent, pad, xpad, apad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys, use_sparse_seen_hash);
+			maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 		sv_catpvn(retval, ")}", 2);
 	    }						     /* plain */
 	    else {
@@ -532,7 +569,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
 			postav, levelp,	indent, pad, xpad, apad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys, use_sparse_seen_hash);
+			maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 	    }
 	    SvREFCNT_dec(namesv);
 	}
@@ -544,7 +581,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	    DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
 		    postav, levelp,	indent, pad, xpad, apad, sep, pair,
 		    freezer, toaster, purity, deepcopy, quotekeys, bless,
-		    maxdepth, sortkeys, use_sparse_seen_hash);
+		    maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 	    SvREFCNT_dec(namesv);
 	}
 	else if (realtype == SVt_PVAV) {
@@ -617,7 +654,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
 			levelp,	indent, pad, xpad, apad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys, use_sparse_seen_hash);
+			maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 		if (ix < ixmax)
 		    sv_catpvn(retval, ",", 1);
 	    }
@@ -777,9 +814,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                    The code is also smaller (22044 vs 22260) because I've been
                    able to pull the common logic out to both sides.  */
                 if (quotekeys || needs_quote(key,keylen)) {
-                    if (do_utf8) {
+                    if (do_utf8 || useqq) {
                         STRLEN ocur = SvCUR(retval);
-                        nlen = esc_q_utf8(aTHX_ retval, key, klen);
+                        nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq);
                         nkey = SvPVX(retval) + ocur;
                     }
                     else {
@@ -824,7 +861,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
 			postav, levelp,	indent, pad, xpad, newapad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys, use_sparse_seen_hash);
+			maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 		SvREFCNT_dec(sname);
 		Safefree(nkey_buffer);
 		if (indent >= 2)
@@ -973,7 +1010,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		r = SvPVX(retval)+SvCUR(retval);
 		r[0] = '*'; r[1] = '{';
 		SvCUR_set(retval, SvCUR(retval)+2);
-		esc_q_utf8(aTHX_ retval, c, i);
+		esc_q_utf8(aTHX_ retval, c, i, 1, useqq);
 		sv_grow(retval, SvCUR(retval)+2);
 		r = SvPVX(retval)+SvCUR(retval);
 		r[0] = '}'; r[1] = '\0';
@@ -1033,7 +1070,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 				seenhv, postav, &nlevel, indent, pad, xpad,
 				newapad, sep, pair, freezer, toaster, purity,
 				deepcopy, quotekeys, bless, maxdepth, 
-				sortkeys, use_sparse_seen_hash);
+				sortkeys, use_sparse_seen_hash, useqq);
 			SvREFCNT_dec(e);
 		    }
 		}
@@ -1062,8 +1099,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	else {
         integer_came_from_string:
 	    c = SvPV(val, i);
-	    if (DO_UTF8(val))
-	        i += esc_q_utf8(aTHX_ retval, c, i);
+	    if (DO_UTF8(val) || useqq)
+	        i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq);
 	    else {
 		sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
 		r = SvPVX(retval) + SvCUR(retval);
@@ -1108,7 +1145,7 @@ Data_Dumper_Dumpxs(href, ...)
 	    HV *seenhv = NULL;
 	    AV *postav, *todumpav, *namesav;
 	    I32 level = 0;
-	    I32 indent, terse, i, imax, postlen;
+	    I32 indent, terse, useqq, i, imax, postlen;
 	    SV **svp;
 	    SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
 	    SV *freezer, *toaster, *bless, *sortkeys;
@@ -1149,7 +1186,7 @@ Data_Dumper_Dumpxs(href, ...)
 		= freezer = toaster = bless = sortkeys = &PL_sv_undef;
 	    name = sv_newmortal();
 	    indent = 2;
-	    terse = purity = deepcopy = 0;
+	    terse = purity = deepcopy = useqq = 0;
 	    quotekeys = 1;
 	
 	    retval = newSVpvn("", 0);
@@ -1173,10 +1210,8 @@ Data_Dumper_Dumpxs(href, ...)
 		    purity = SvIV(*svp);
 		if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
 		    terse = SvTRUE(*svp);
-#if 0 /* useqq currently unused */
 		if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
 		    useqq = SvTRUE(*svp);
-#endif
 		if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
 		    pad = *svp;
 		if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
@@ -1280,7 +1315,7 @@ Data_Dumper_Dumpxs(href, ...)
 		    DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
 			    postav, &level, indent, pad, xpad, newapad, sep, pair,
 			    freezer, toaster, purity, deepcopy, quotekeys,
-			    bless, maxdepth, sortkeys, use_sparse_seen_hash);
+			    bless, maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 		    SPAGAIN;
 		
 		    if (indent >= 2 && !terse)
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
old mode 100644
new mode 100755
index c1e5fe6..5ae287e
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -307,20 +307,9 @@ $foo = { "abc\000\'\efg" => "mno\000",
 {
   local $Data::Dumper::Useqq = 1;
   TEST q(Dumper($foo));
+  TEST q(Data::Dumper::DumperX($foo)) if $XS;
 }
 
-  $WANT = <<"EOT";
-#\$VAR1 = {
-#  'abc\0\\'\efg' => 'mno\0',
-#  'reftest' => \\\\1
-#};
-EOT
-
-  {
-    local $Data::Dumper::Useqq = 1;
-    TEST q(Data::Dumper::DumperX($foo)) if $XS;   # cheat
-  }
-
 
 
 #############
@@ -1461,7 +1450,7 @@ EOT
   $foo = [ join "", map chr, 0..255 ];
   local $Data::Dumper::Useqq = 1;
   TEST q(Dumper($foo)), 'All latin1 characters';
-  for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+  TEST q(Data::Dumper::DumperX($foo)) if $XS;
 }
 
 ############# 372
@@ -1481,7 +1470,7 @@ EOT
     TEST q(Dumper($foo)),
 	 'All latin1 characters with utf8 flag including a wide character';
   }
-  for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+  TEST q(Data::Dumper::DumperX($foo)) if $XS;
 }
 
 ############# 378
-- 
1.7.2.5

@p5pRT
Copy link
Author

p5pRT commented Jul 10, 2013

From @tonycoz

On Mon Jul 01 17​:35​:57 2013, tonyc wrote​:

On Tue, Jun 25, 2013 at 03​:57​:04PM +0200, Slaven Rezic wrote​:
There's one bug I can see with this change​:

$ ./perl -Ilib -MData​::Dumper -e '++$Data​::Dumper​::Useqq; print
Dumper("\x011")'
$VAR1 = "\11";

This isn't present in the perl version​:

$ ./perl -Ilib -MData​::Dumper -e '++$Data​::Dumper​::Useqq;
++$Data​::Dumper​::Useperl; print Dumper("\x011")'
$VAR1 = "\0011";

I've attached a new series of patches​:

0001 - Slaven's patch, but don't make dumper.t +x
0002 - match whitespace to the original code
0003 - fix the problem described above

Tony

@p5pRT
Copy link
Author

p5pRT commented Jul 10, 2013

From @tonycoz

0003-handle-xs-Useqq-dumping-of-strings-with-an-escape-fo.patch
From a4c0e5ce766c5f361af161e4bb6b42f71b2aecd0 Mon Sep 17 00:00:00 2001
From: Tony Cook <tony@develop-help.com>
Date: Wed, 10 Jul 2013 14:54:20 +1000
Subject: [PATCH 3/3] handle xs Useqq dumping of strings with an escape
 followed by a digit

The original patch didn't handle a string like "\x001" correctly, encoding
it as "\01" rather than "\0001".

Added tests for this case and some possible corner cases
---
 dist/Data-Dumper/Dumper.pm  |    2 +-
 dist/Data-Dumper/Dumper.xs  |   32 ++++++++++++++++++++++++++------
 dist/Data-Dumper/t/dumper.t |   34 ++++++++++++++++++++++++++++++++--
 3 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index e11323a..fca6ab1 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -10,7 +10,7 @@
 package Data::Dumper;
 
 BEGIN {
-    $VERSION = '2.146'; # Don't forget to set version and release
+    $VERSION = '2.147'; # Don't forget to set version and release
 }               # date in POD below!
 
 #$| = 1;
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index 60fe404..0194a2c 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -175,6 +175,7 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
     STRLEN qq_escapables = 0;	/* " $ @ will need a \ in "" strings.  */
     STRLEN normal = 0;
     int increment;
+    UV next;
 
     /* this will need EBCDICification */
     for (s = src; s < send; do_utf8 ? s += increment : s++) {
@@ -183,6 +184,12 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
         /* check for invalid utf8 */
         increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
 
+	/* this is only used to check if the next character is an
+	 * ASCII digit, which are invariant, so if the following collects
+	 * a UTF-8 start byte it does no harm
+	 */
+	next = (s + increment >= send ) ? 0 : *(U8*)(s+increment);
+
 #ifdef EBCDIC
 	if (!isprint(k) || k > 256) {
 #else
@@ -197,11 +204,14 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
 #endif
                 );
 #ifndef EBCDIC
-	} else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) {
+	} else if (useqq &&
+	    /* we can't use the short form like '\0' if followed by a digit */
+                   ((k >= 7 && k <= 10 || k == 12 || k == 13 || k == 27)
+                 || (k < 8 && (next < '0' || next > '9')))) {
 	    grow += 2;
-	} else if (useqq && k <= 31) {
+	} else if (useqq && k <= 31 && (next < '0' || next > '9')) {
 	    grow += 3;
-	} else if (useqq && k >= 127) {
+	} else if (useqq && (k <= 31 || k >= 127)) {
 	    grow += 4;
 #endif
         } else if (k == '\\') {
@@ -225,7 +235,6 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
         for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
             const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
 
-
             if (k == '"' || k == '\\' || k == '$' || k == '@') {
                 *r++ = '\\';
                 *r++ = (char)k;
@@ -235,6 +244,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
 	      if (isprint(k) && k < 256)
 #else
 	      if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
+	        bool next_is_digit;
+
 		*r++ = '\\';
 		switch (k) {
 		case 7:  *r++ = 'a'; break;
@@ -245,12 +256,21 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
 		case 13: *r++ = 'r'; break;
 		case 27: *r++ = 'e'; break;
 		default:
+		    increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
+
+		    /* only ASCII digits matter here, which are invariant,
+		     * since we only encode characters \377 and under, or
+		     * \x177 and under for a unicode string
+		     */
+		    next = (s+increment < send) ? *(U8*)(s+increment) : 0;
+		    next_is_digit = next >= '0' && next <= '9';
+
 		    /* faster than
 		     * r = r + my_sprintf(r, "%o", k);
 		     */
-		    if (k <= 7) {
+		    if (k <= 7 && !next_is_digit) {
 			*r++ = (char)k + '0';
-		    } else if (k <= 63) {
+		    } else if (k <= 63 && !next_is_digit) {
 			*r++ = (char)(k>>3) + '0';
 			*r++ = (char)(k&7) + '0';
 		    } else {
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
index 5ae287e..0bc4746 100644
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -83,11 +83,11 @@ sub SKIP_TEST {
 $Data::Dumper::Useperl = 1;
 if (defined &Data::Dumper::Dumpxs) {
   print "### XS extension loaded, will run XS tests\n";
-  $TMAX = 402; $XS = 1;
+  $TMAX = 420; $XS = 1;
 }
 else {
   print "### XS extensions not loaded, will NOT run XS tests\n";
-  $TMAX = 201; $XS = 0;
+  $TMAX = 213; $XS = 0;
 }
 
 print "1..$TMAX\n";
@@ -1526,3 +1526,33 @@ EOW
   TEST q(Data::Dumper->Dumpxs([\*finkle])), 'blessed overloaded globs (xs)'
     if $XS;
 }
+############# 390
+{
+  # [perl #74798] uncovered behaviour
+  $WANT = <<'EOW';
+#$VAR1 = "\0000";
+EOW
+  local $Data::Dumper::Useqq = 1;
+  TEST q(Data::Dumper->Dump(["\x000"])),
+    "\\ octal followed by digit";
+  TEST q(Data::Dumper->Dumpxs(["\x000"])), '\\ octal followed by digit (xs)'
+    if $XS;
+
+  $WANT = <<'EOW';
+#$VAR1 = "\x{100}\0000";
+EOW
+  local $Data::Dumper::Useqq = 1;
+  TEST q(Data::Dumper->Dump(["\x{100}\x000"])),
+    "\\ octal followed by digit unicode";
+  TEST q(Data::Dumper->Dumpxs(["\x{100}\x000"])), '\\ octal followed by digit unicode (xs)'
+    if $XS;
+
+
+  $WANT = <<'EOW';
+#$VAR1 = "\0\x{660}";
+EOW
+  TEST q(Data::Dumper->Dump(["\\x00\\x{0660}"])),
+    "\\ octal followed by unicode digit";
+  TEST q(Data::Dumper->Dumpxs(["\\x00\\x{0660}"])), '\\ octal followed by unicode digit (xs)'
+    if $XS;
+}
-- 
1.7.10.4

@p5pRT
Copy link
Author

p5pRT commented Jul 10, 2013

From @tonycoz

0001-Data-Dumper-useqq-implementation-for-xs.patch
From 87058463731e54ccb3e1a9d7f3a3e605db56100b Mon Sep 17 00:00:00 2001
From: Slaven Rezic <srezic@iconmobile.com>
Date: Wed, 10 Jul 2013 14:18:18 +1000
Subject: [PATCH 1/3] Data::Dumper: useqq implementation for xs

Tests are mainly unchanged, just a "cheat" and a couple of TODOs were
removed.
---
 dist/Data-Dumper/Dumper.pm  |    1 -
 dist/Data-Dumper/Dumper.xs  |   85 ++++++++++++++++++++++++++++++-------------
 dist/Data-Dumper/t/dumper.t |   17 ++-------
 3 files changed, 63 insertions(+), 40 deletions(-)

diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index 7c778dc..e11323a 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -221,7 +221,6 @@ sub DESTROY {}
 sub Dump {
     return &Dumpxs
     unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
-           $Data::Dumper::Useqq   || (ref($_[0]) && $_[0]->{useqq}) ||
            $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
     return &Dumpperl;
 }
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index b74650a..2a19097 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -18,7 +18,7 @@
 
 static I32 num_q (const char *s, STRLEN slen);
 static I32 esc_q (char *dest, const char *src, STRLEN slen);
-static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
+static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq);
 static I32 needs_quote(const char *s, STRLEN len);
 static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
 static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
@@ -26,7 +26,7 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
 		    SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
 		    SV *freezer, SV *toaster,
 		    I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
-		    I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash);
+		    I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq);
 
 #ifndef HvNAME_get
 #define HvNAME_get HvNAME
@@ -158,8 +158,9 @@ esc_q(char *d, const char *s, STRLEN slen)
     return ret;
 }
 
+/* this function is also misused for implementing $Useqq */
 static I32
-esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
+esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
 {
     char *r, *rstart;
     const char *s = src;
@@ -176,8 +177,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
     int increment;
 
     /* this will need EBCDICification */
-    for (s = src; s < send; s += increment) {
-        const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+    for (s = src; s < send; do_utf8 ? s += increment : s++) {
+        const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
 
         /* check for invalid utf8 */
         increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
@@ -195,6 +196,14 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
                 k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
 #endif
                 );
+#ifndef EBCDIC
+	} else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) {
+	    grow += 2;
+	} else if (useqq && k <= 31) {
+	    grow += 3;
+	} else if (useqq && k >= 127) {
+	    grow += 4;
+#endif
         } else if (k == '\\') {
             backslashes++;
         } else if (k == '\'') {
@@ -205,7 +214,7 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
             normal++;
         }
     }
-    if (grow) {
+    if (grow || useqq) {
         /* We have something needing hex. 3 is ""\0 */
         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
 		+ 2*qq_escapables + normal);
@@ -213,8 +222,9 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
 
         *r++ = '"';
 
-        for (s = src; s < send; s += UTF8SKIP(s)) {
-            const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+        for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
+            const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
+
 
             if (k == '"' || k == '\\' || k == '$' || k == '@') {
                 *r++ = '\\';
@@ -224,6 +234,33 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
 #ifdef EBCDIC
 	      if (isprint(k) && k < 256)
 #else
+	      if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
+		*r++ = '\\';
+		switch (k) {
+		    case 7:  *r++ = 'a'; break;
+		    case 8:  *r++ = 'b'; break;
+		    case 9:  *r++ = 't'; break;
+		    case 10: *r++ = 'n'; break;
+		    case 12: *r++ = 'f'; break;
+		    case 13: *r++ = 'r'; break;
+		    case 27: *r++ = 'e'; break;
+                    default:
+                      /* faster than
+                       * r = r + my_sprintf(r, "%o", k);
+                       */
+                      if (k <= 7) {
+                        *r++ = (char)k + '0';
+                      } else if (k <= 63) {
+                        *r++ = (char)(k>>3) + '0';
+                        *r++ = (char)(k&7) + '0';
+                      } else {
+                        *r++ = (char)(k>>6) + '0';
+                        *r++ = (char)((k&63)>>3) + '0';
+                        *r++ = (char)(k&7) + '0';
+		      }
+		}
+	      }
+	      else
 	      if (k < 0x80)
 #endif
                 *r++ = (char)k;
@@ -298,7 +335,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
 	SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
 	I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys,
-        int use_sparse_seen_hash)
+        int use_sparse_seen_hash, I32 useqq)
 {
     char tmpbuf[128];
     U32 i;
@@ -524,7 +561,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
 			postav, levelp,	indent, pad, xpad, apad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys, use_sparse_seen_hash);
+			maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 		sv_catpvn(retval, ")}", 2);
 	    }						     /* plain */
 	    else {
@@ -532,7 +569,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
 			postav, levelp,	indent, pad, xpad, apad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys, use_sparse_seen_hash);
+			maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 	    }
 	    SvREFCNT_dec(namesv);
 	}
@@ -544,7 +581,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	    DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
 		    postav, levelp,	indent, pad, xpad, apad, sep, pair,
 		    freezer, toaster, purity, deepcopy, quotekeys, bless,
-		    maxdepth, sortkeys, use_sparse_seen_hash);
+		    maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 	    SvREFCNT_dec(namesv);
 	}
 	else if (realtype == SVt_PVAV) {
@@ -617,7 +654,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
 			levelp,	indent, pad, xpad, apad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys, use_sparse_seen_hash);
+			maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 		if (ix < ixmax)
 		    sv_catpvn(retval, ",", 1);
 	    }
@@ -777,9 +814,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
                    The code is also smaller (22044 vs 22260) because I've been
                    able to pull the common logic out to both sides.  */
                 if (quotekeys || needs_quote(key,keylen)) {
-                    if (do_utf8) {
+                    if (do_utf8 || useqq) {
                         STRLEN ocur = SvCUR(retval);
-                        nlen = esc_q_utf8(aTHX_ retval, key, klen);
+                        nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq);
                         nkey = SvPVX(retval) + ocur;
                     }
                     else {
@@ -824,7 +861,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
 			postav, levelp,	indent, pad, xpad, newapad, sep, pair,
 			freezer, toaster, purity, deepcopy, quotekeys, bless,
-			maxdepth, sortkeys, use_sparse_seen_hash);
+			maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 		SvREFCNT_dec(sname);
 		Safefree(nkey_buffer);
 		if (indent >= 2)
@@ -973,7 +1010,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 		r = SvPVX(retval)+SvCUR(retval);
 		r[0] = '*'; r[1] = '{';
 		SvCUR_set(retval, SvCUR(retval)+2);
-		esc_q_utf8(aTHX_ retval, c, i);
+		esc_q_utf8(aTHX_ retval, c, i, 1, useqq);
 		sv_grow(retval, SvCUR(retval)+2);
 		r = SvPVX(retval)+SvCUR(retval);
 		r[0] = '}'; r[1] = '\0';
@@ -1033,7 +1070,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 				seenhv, postav, &nlevel, indent, pad, xpad,
 				newapad, sep, pair, freezer, toaster, purity,
 				deepcopy, quotekeys, bless, maxdepth, 
-				sortkeys, use_sparse_seen_hash);
+				sortkeys, use_sparse_seen_hash, useqq);
 			SvREFCNT_dec(e);
 		    }
 		}
@@ -1062,8 +1099,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	else {
         integer_came_from_string:
 	    c = SvPV(val, i);
-	    if (DO_UTF8(val))
-	        i += esc_q_utf8(aTHX_ retval, c, i);
+	    if (DO_UTF8(val) || useqq)
+	        i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq);
 	    else {
 		sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
 		r = SvPVX(retval) + SvCUR(retval);
@@ -1108,7 +1145,7 @@ Data_Dumper_Dumpxs(href, ...)
 	    HV *seenhv = NULL;
 	    AV *postav, *todumpav, *namesav;
 	    I32 level = 0;
-	    I32 indent, terse, i, imax, postlen;
+	    I32 indent, terse, useqq, i, imax, postlen;
 	    SV **svp;
 	    SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
 	    SV *freezer, *toaster, *bless, *sortkeys;
@@ -1149,7 +1186,7 @@ Data_Dumper_Dumpxs(href, ...)
 		= freezer = toaster = bless = sortkeys = &PL_sv_undef;
 	    name = sv_newmortal();
 	    indent = 2;
-	    terse = purity = deepcopy = 0;
+	    terse = purity = deepcopy = useqq = 0;
 	    quotekeys = 1;
 	
 	    retval = newSVpvn("", 0);
@@ -1173,10 +1210,8 @@ Data_Dumper_Dumpxs(href, ...)
 		    purity = SvIV(*svp);
 		if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
 		    terse = SvTRUE(*svp);
-#if 0 /* useqq currently unused */
 		if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
 		    useqq = SvTRUE(*svp);
-#endif
 		if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
 		    pad = *svp;
 		if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
@@ -1280,7 +1315,7 @@ Data_Dumper_Dumpxs(href, ...)
 		    DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
 			    postav, &level, indent, pad, xpad, newapad, sep, pair,
 			    freezer, toaster, purity, deepcopy, quotekeys,
-			    bless, maxdepth, sortkeys, use_sparse_seen_hash);
+			    bless, maxdepth, sortkeys, use_sparse_seen_hash, useqq);
 		    SPAGAIN;
 		
 		    if (indent >= 2 && !terse)
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
index c1e5fe6..5ae287e 100644
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -307,20 +307,9 @@ $foo = { "abc\000\'\efg" => "mno\000",
 {
   local $Data::Dumper::Useqq = 1;
   TEST q(Dumper($foo));
+  TEST q(Data::Dumper::DumperX($foo)) if $XS;
 }
 
-  $WANT = <<"EOT";
-#\$VAR1 = {
-#  'abc\0\\'\efg' => 'mno\0',
-#  'reftest' => \\\\1
-#};
-EOT
-
-  {
-    local $Data::Dumper::Useqq = 1;
-    TEST q(Data::Dumper::DumperX($foo)) if $XS;   # cheat
-  }
-
 
 
 #############
@@ -1461,7 +1450,7 @@ EOT
   $foo = [ join "", map chr, 0..255 ];
   local $Data::Dumper::Useqq = 1;
   TEST q(Dumper($foo)), 'All latin1 characters';
-  for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+  TEST q(Data::Dumper::DumperX($foo)) if $XS;
 }
 
 ############# 372
@@ -1481,7 +1470,7 @@ EOT
     TEST q(Dumper($foo)),
 	 'All latin1 characters with utf8 flag including a wide character';
   }
-  for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
+  TEST q(Data::Dumper::DumperX($foo)) if $XS;
 }
 
 ############# 378
-- 
1.7.10.4

@p5pRT
Copy link
Author

p5pRT commented Jul 10, 2013

From @tonycoz

0002-adjust-indentation-to-match-other-DD-code.patch
From 155c29d345d666151ff3e56aa40a74cd737e9fab Mon Sep 17 00:00:00 2001
From: Tony Cook <tony@develop-help.com>
Date: Wed, 10 Jul 2013 14:19:17 +1000
Subject: [PATCH 2/3] adjust indentation to match other DD code

---
 dist/Data-Dumper/Dumper.xs |   47 ++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index 2a19097..60fe404 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -237,31 +237,30 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
 	      if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
 		*r++ = '\\';
 		switch (k) {
-		    case 7:  *r++ = 'a'; break;
-		    case 8:  *r++ = 'b'; break;
-		    case 9:  *r++ = 't'; break;
-		    case 10: *r++ = 'n'; break;
-		    case 12: *r++ = 'f'; break;
-		    case 13: *r++ = 'r'; break;
-		    case 27: *r++ = 'e'; break;
-                    default:
-                      /* faster than
-                       * r = r + my_sprintf(r, "%o", k);
-                       */
-                      if (k <= 7) {
-                        *r++ = (char)k + '0';
-                      } else if (k <= 63) {
-                        *r++ = (char)(k>>3) + '0';
-                        *r++ = (char)(k&7) + '0';
-                      } else {
-                        *r++ = (char)(k>>6) + '0';
-                        *r++ = (char)((k&63)>>3) + '0';
-                        *r++ = (char)(k&7) + '0';
-		      }
+		case 7:  *r++ = 'a'; break;
+		case 8:  *r++ = 'b'; break;
+		case 9:  *r++ = 't'; break;
+		case 10: *r++ = 'n'; break;
+		case 12: *r++ = 'f'; break;
+		case 13: *r++ = 'r'; break;
+		case 27: *r++ = 'e'; break;
+		default:
+		    /* faster than
+		     * r = r + my_sprintf(r, "%o", k);
+		     */
+		    if (k <= 7) {
+			*r++ = (char)k + '0';
+		    } else if (k <= 63) {
+			*r++ = (char)(k>>3) + '0';
+			*r++ = (char)(k&7) + '0';
+		    } else {
+			*r++ = (char)(k>>6) + '0';
+			*r++ = (char)((k&63)>>3) + '0';
+			*r++ = (char)(k&7) + '0';
+		    }
 		}
-	      }
-	      else
-	      if (k < 0x80)
+	    }
+	    else if (k < 0x80)
 #endif
                 *r++ = (char)k;
             else {
-- 
1.7.10.4

@p5pRT
Copy link
Author

p5pRT commented Jul 17, 2013

From @tonycoz

On Tue Jul 09 22​:22​:37 2013, tonyc wrote​:

On Mon Jul 01 17​:35​:57 2013, tonyc wrote​:

On Tue, Jun 25, 2013 at 03​:57​:04PM +0200, Slaven Rezic wrote​:
There's one bug I can see with this change​:

$ ./perl -Ilib -MData​::Dumper -e '++$Data​::Dumper​::Useqq; print
Dumper("\x011")'
$VAR1 = "\11";

This isn't present in the perl version​:

$ ./perl -Ilib -MData​::Dumper -e '++$Data​::Dumper​::Useqq;
++$Data​::Dumper​::Useperl; print Dumper("\x011")'
$VAR1 = "\0011";

I've attached a new series of patches​:

0001 - Slaven's patch, but don't make dumper.t +x
0002 - match whitespace to the original code
0003 - fix the problem described above

Applied as​:
9baac1a,
f1c4594,
dbf00f6 and
merge commit 2780a6e.

Tony

@p5pRT
Copy link
Author

p5pRT commented Jul 17, 2013

@tonycoz - Status changed from 'open' to 'resolved'

@p5pRT
Copy link
Author

p5pRT commented Jul 17, 2013

From @demerphq

On 17 July 2013 03​:11, Tony Cook via RT <perlbug-followup@​perl.org> wrote​:

On Tue Jul 09 22​:22​:37 2013, tonyc wrote​:

On Mon Jul 01 17​:35​:57 2013, tonyc wrote​:

On Tue, Jun 25, 2013 at 03​:57​:04PM +0200, Slaven Rezic wrote​:
There's one bug I can see with this change​:

$ ./perl -Ilib -MData​::Dumper -e '++$Data​::Dumper​::Useqq; print
Dumper("\x011")'
$VAR1 = "\11";

This isn't present in the perl version​:

$ ./perl -Ilib -MData​::Dumper -e '++$Data​::Dumper​::Useqq;
++$Data​::Dumper​::Useperl; print Dumper("\x011")'
$VAR1 = "\0011";

I've attached a new series of patches​:

0001 - Slaven's patch, but don't make dumper.t +x
0002 - match whitespace to the original code
0003 - fix the problem described above

Applied as​:
9baac1a,
f1c4594,
dbf00f6 and
merge commit 2780a6e.

I dont think this patch can be right (I have not verified) and I dont
see tests for behavior I expect needs testing​:

#else
+ if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
+ *r++ = '\\';
+ switch (k) {
+ case 7​: *r++ = 'a'; break;
+ case 8​: *r++ = 'b'; break;
+ case 9​: *r++ = 't'; break;
+ case 10​: *r++ = 'n'; break;
+ case 12​: *r++ = 'f'; break;
+ case 13​: *r++ = 'r'; break;
+ case 27​: *r++ = 'e'; break;
+ default​:
+ /* faster than
+ * r = r + my_sprintf(r, "%o", k);
+ */
+ if (k <= 7) {
+ *r++ = (char)k + '0';
+ } else if (k <= 63) {
+ *r++ = (char)(k>>3) + '0';
+ *r++ = (char)(k&7) + '0';
+ } else {
+ *r++ = (char)(k>>6) + '0';
+ *r++ = (char)((k&63)>>3) + '0';
+ *r++ = (char)(k&7) + '0';
+ }
+ }

I see nothing that deals with something like this​: "\0000"

That is, a string containing an octal null followed by a zero.

Or similar cases. The pureperl implementation DOES deal with this​:

$ perl -MData​::Dumper -e'$Data​::Dumper​::Useqq=1; print Dumper("\0000")'
$VAR1 = "\0000";

Now, I admit I havent *tried* the patch, but I did not see any code to
look ahead at the *next* character and check if it is an octal digit,
and if so force "three digit octal" mode.

Yves

--
perl -Mre=debug -e "/just|another|perl|hacker/"

@p5pRT
Copy link
Author

p5pRT commented Jul 19, 2013

From @eserte

yves orton via RT wrote​:

On 17 July 2013 03​:11, Tony Cook via RT<perlbug-followup@​perl.org> wrote​:

On Tue Jul 09 22​:22​:37 2013, tonyc wrote​:

On Mon Jul 01 17​:35​:57 2013, tonyc wrote​:

On Tue, Jun 25, 2013 at 03​:57​:04PM +0200, Slaven Rezic wrote​:
There's one bug I can see with this change​:

$ ./perl -Ilib -MData​::Dumper -e '++$Data​::Dumper​::Useqq; print
Dumper("\x011")'
$VAR1 = "\11";

This isn't present in the perl version​:

$ ./perl -Ilib -MData​::Dumper -e '++$Data​::Dumper​::Useqq;
++$Data​::Dumper​::Useperl; print Dumper("\x011")'
$VAR1 = "\0011";

I've attached a new series of patches​:

0001 - Slaven's patch, but don't make dumper.t +x
0002 - match whitespace to the original code
0003 - fix the problem described above

Applied as​:
9baac1a,
f1c4594,
dbf00f6 and
merge commit 2780a6e.

I dont think this patch can be right (I have not verified) and I dont
see tests for behavior I expect needs testing​:

#else
+ if (useqq&& (k<= 31 || k == 127 || (!do_utf8&& k> 127))) {
+ *r++ = '\\';
+ switch (k) {
+ case 7​: *r++ = 'a'; break;
+ case 8​: *r++ = 'b'; break;
+ case 9​: *r++ = 't'; break;
+ case 10​: *r++ = 'n'; break;
+ case 12​: *r++ = 'f'; break;
+ case 13​: *r++ = 'r'; break;
+ case 27​: *r++ = 'e'; break;
+ default​:
+ /* faster than
+ * r = r + my_sprintf(r, "%o", k);
+ */
+ if (k<= 7) {
+ *r++ = (char)k + '0';
+ } else if (k<= 63) {
+ *r++ = (char)(k>>3) + '0';
+ *r++ = (char)(k&7) + '0';
+ } else {
+ *r++ = (char)(k>>6) + '0';
+ *r++ = (char)((k&63)>>3) + '0';
+ *r++ = (char)(k&7) + '0';
+ }
+ }

I see nothing that deals with something like this​: "\0000"

That is, a string containing an octal null followed by a zero.

Or similar cases. The pureperl implementation DOES deal with this​:

$ perl -MData​::Dumper -e'$Data​::Dumper​::Useqq=1; print Dumper("\0000")'
$VAR1 = "\0000";

Now, I admit I havent *tried* the patch, but I did not see any code to
look ahead at the *next* character and check if it is an octal digit,
and if so force "three digit octal" mode.

Seems to work​:

11​:32 slavenr@​dev08 blead (/work2/perl)​: /opt/perl5.19.1-492-g851ffa6/bin/perl5.19.2 -MData​::Dumper -e'print Data​::Dumper->new(["\0000"])->Useqq(1)->Dumpxs'
$VAR1 = "\0000";
11​:32 slavenr@​dev08 blead (
/work2/perl)​: /opt/perl5.19.1-492-g851ffa6/bin/perl5.19.2 -MData​::Dumper -e'print Data​::Dumper->new(["\0000"])->Useqq(1)->Dumpperl'
$VAR1 = "\0000";

Regards,
  Slaven

@p5pRT
Copy link
Author

p5pRT commented Jul 20, 2013

From @cpansprout

On Fri Jul 19 12​:18​:01 2013, srezic@​iconmobile.com wrote​:

Seems to work​:

11​:32 slavenr@​dev08 blead (~/work2/perl)​: /opt/perl5.19.1-492-
g851ffa6/bin/perl5.19.2 -MData​::Dumper -e'print Data​::Dumper-

new(["\0000"])->Useqq(1)->Dumpxs'
$VAR1 = "\0000";
11​:32 slavenr@​dev08 blead (~/work2/perl)​: /opt/perl5.19.1-492-
g851ffa6/bin/perl5.19.2 -MData​::Dumper -e'print Data​::Dumper-
new(["\0000"])->Useqq(1)->Dumpperl'
$VAR1 = "\0000";

Actually, there is one problem. Would you be willing to look into
<https://rt-archive.perl.org/perl5/Ticket/Display.html?id=118933>?

--

Father Chrysostomos

@p5pRT
Copy link
Author

p5pRT commented Jul 20, 2013

From @jkeenan

On Fri Jul 19 18​:01​:02 2013, sprout wrote​:

On Fri Jul 19 12​:18​:01 2013, srezic@​iconmobile.com wrote​:

Seems to work​:

11​:32 slavenr@​dev08 blead (~/work2/perl)​: /opt/perl5.19.1-492-
g851ffa6/bin/perl5.19.2 -MData​::Dumper -e'print Data​::Dumper-

new(["\0000"])->Useqq(1)->Dumpxs'
$VAR1 = "\0000";
11​:32 slavenr@​dev08 blead (~/work2/perl)​: /opt/perl5.19.1-492-
g851ffa6/bin/perl5.19.2 -MData​::Dumper -e'print Data​::Dumper-
new(["\0000"])->Useqq(1)->Dumpperl'
$VAR1 = "\0000";

Actually, there is one problem. Would you be willing to look into
<https://rt-archive.perl.org/perl5/Ticket/Display.html?id=118933>?

Given the questions raised by demerphq and by Father C in this post, I
think we should move this RT back to the Open category. Doing so now.

Thank you very much.
Jim Keenan

@p5pRT
Copy link
Author

p5pRT commented Jul 20, 2013

@jkeenan - Status changed from 'resolved' to 'open'

@p5pRT
Copy link
Author

p5pRT commented Jul 22, 2013

From @tonycoz

On Wed Jul 17 07​:14​:53 2013, demerphq wrote​:

I see nothing that deals with something like this​: "\0000"

That is, a string containing an octal null followed by a zero.

Or similar cases. The pureperl implementation DOES deal with this​:

$ perl -MData​::Dumper -e'$Data​::Dumper​::Useqq=1; print Dumper("\0000")'
$VAR1 = "\0000";

Now, I admit I havent *tried* the patch, but I did not see any code to
look ahead at the *next* character and check if it is an octal digit,
and if so force "three digit octal" mode.

Slaven's original patch didn't handle this case, I wrote a follow-up
change that tested for and fixed this behaviour.

That said, I need to look at the BBC failure to see what I've broken.

Tony

@p5pRT
Copy link
Author

p5pRT commented Jul 22, 2013

From @demerphq

On 22 July 2013 02​:32, Tony Cook via RT <perlbug-followup@​perl.org> wrote​:

On Wed Jul 17 07​:14​:53 2013, demerphq wrote​:

I see nothing that deals with something like this​: "\0000"

That is, a string containing an octal null followed by a zero.

Or similar cases. The pureperl implementation DOES deal with this​:

$ perl -MData​::Dumper -e'$Data​::Dumper​::Useqq=1; print Dumper("\0000")'
$VAR1 = "\0000";

Now, I admit I havent *tried* the patch, but I did not see any code to
look ahead at the *next* character and check if it is an octal digit,
and if so force "three digit octal" mode.

Slaven's original patch didn't handle this case, I wrote a follow-up
change that tested for and fixed this behaviour.

Yeah, I meant to reply to this thread thanking you for that. :-)

Thanks man!

Yves

--
perl -Mre=debug -e "/just|another|perl|hacker/"

@p5pRT
Copy link
Author

p5pRT commented Jul 22, 2013

From @tonycoz

On Fri Jul 19 19​:16​:12 2013, jkeenan wrote​:

On Fri Jul 19 18​:01​:02 2013, sprout wrote​:

Actually, there is one problem. Would you be willing to look into
<https://rt-archive.perl.org/perl5/Ticket/Display.html?id=118933>?

Given the questions raised by demerphq and by Father C in this post, I
think we should move this RT back to the Open category. Doing so now.

I've fixed the failure in autobox​::dump, and opened a ticket against
Project​::Easy​:

https://rt.cpan.org/Ticket/Display.html?id=87171

Tony

@p5pRT
Copy link
Author

p5pRT commented Aug 9, 2013

From @tonycoz

On Mon Jul 22 16​:56​:36 2013, tonyc wrote​:

On Fri Jul 19 19​:16​:12 2013, jkeenan wrote​:

On Fri Jul 19 18​:01​:02 2013, sprout wrote​:

Actually, there is one problem. Would you be willing to look into
<https://rt-archive.perl.org/perl5/Ticket/Display.html?id=118933>?

Given the questions raised by demerphq and by Father C in this post, I
think we should move this RT back to the Open category. Doing so now.

I've fixed the failure in autobox​::dump, and opened a ticket against
Project​::Easy​:

https://rt.cpan.org/Ticket/Display.html?id=87171

And closing this one (again) too.

Tony

@p5pRT
Copy link
Author

p5pRT commented Aug 9, 2013

@tonycoz - Status changed from 'open' to 'resolved'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant