Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

string to number radix support #337

Closed
p6rt opened this issue Sep 23, 2008 · 10 comments
Closed

string to number radix support #337

p6rt opened this issue Sep 23, 2008 · 10 comments
Labels

Comments

@p6rt
Copy link

p6rt commented Sep 23, 2008

Migrated from rt.perl.org#59222 (status was 'resolved')

Searchable as RT59222$

@p6rt
Copy link
Author

p6rt commented Sep 23, 2008

From jswitzer@gmail.com

This patch adds radix notation (​:\d<...>) to the string-to-number conversion
routine Perl6Str​::get_number. The tests haven't been written and
incorporated to test specifically the string conversion but the spectest
suite passes as of r31354 (parrot) and r22313 (t/spec). Cordination with
moritz_ will take place to determine the location and the extent of the
tests that will be created.

Not all radix constructs are supported yet (in the works). Currently does
not support nested notation, such as :10<12*23**3> or functional forms such
as :10('1234').

Thanks,

Jason "s1n" Switzer

@p6rt
Copy link
Author

p6rt commented Sep 23, 2008

From jswitzer@gmail.com

radix.patch
Index: perl6str.pmc
===================================================================
--- perl6str.pmc	(revision 31353)
+++ perl6str.pmc	(working copy)
@@ -23,6 +23,58 @@
 #include <ctype.h>
 #include <math.h>
 
+FLOATVAL parse_number(const char** start, const char* stop, FLOATVAL radix) {
+    FLOATVAL number = 0.0;
+    //continue until the end or until we've hit a non-digit
+    while (*start < stop && isxdigit((unsigned char)**start)) {
+        /* number must not contain values less than the radix */
+        unsigned int current;
+        if (isdigit(**start)) current = (**start) - '0';
+        else current = tolower(**start) - 'a' + 10;
+        //number += ((*start) - '0') * frac;
+        if (current >= radix)
+            return number;
+
+        number = number * radix + current;
+        (*start)++;
+
+        /* skip past the digit seperator _ */
+        if (*start < stop && **start == '_') {
+            if(!isxdigit((unsigned char)*((*start) + 1))) {
+                return number;
+            }
+            (*start)++;
+        }
+    }
+    return number;
+}
+
+FLOATVAL parse_fraction(const char** start, const char* stop, FLOATVAL radix) {
+    FLOATVAL frac = 1.0;
+    FLOATVAL number = 0.0;
+    while (*start < stop && isxdigit((unsigned char)**start)) {
+        frac /= radix;
+        unsigned int current;
+        if (isdigit(**start)) current = (**start) - '0';
+        else current = tolower(**start) - 'a' + 10;
+
+        /* if we discover scientific notation, stop parsing */
+        if (tolower(**start) == 'e' && radix == 10)
+            return number;
+        if (current >= radix)
+            return 0;
+        
+        number += current * frac;
+        (*start)++;
+        
+        /* skip past the digit seperator _ */
+        if (*start < stop && **start == '_'
+            && isdigit((unsigned char)**(start + 1)))
+            (*start)++;
+    }
+    return number;
+}
+
 pmclass Perl6Str
     extends  String
     provides string
@@ -63,12 +115,14 @@
 
     FLOATVAL get_number() {
         STRING   *s    = (STRING *)PMC_str_val(SELF);
-        FLOATVAL  sign = 1.0;
-        FLOATVAL  f    = 0.0;
+        FLOATVAL sign = 1.0;
+        FLOATVAL f    = 0.0;
+        FLOATVAL radix = 10.0;
+        int radfound = 0;
 
-        if (s) {
+        if (s && s->strstart) {
             const char *s1         = s->strstart;
-            const char * const end = s1 + s->bufused;
+            const char * const end = s1 + strlen(s1);
 
             /* skip leading whitespace */
             while (s1 < end && isspace((unsigned char)*s1))
@@ -92,92 +146,59 @@
                     case 'x':
                     case 'X':
                         s1++;
-                        while (s1 < end && isxdigit((unsigned char)*s1)) {
-                            f *= 16;
-                            if (isdigit((unsigned char)*s1))
-                                f += (*s1) - '0';
-                            else
-                                f += tolower((unsigned char)*s1) - 'a' + 10;
-
-                            s1++;
-
-                            if (s1 < end && *s1 == '_')
-                                s1++;
-                        }
-                        return sign * f;
+                        return parse_number(&s1, end, 16);
                     case 'd':
                     case 'D':
                         s1++;
-                        while (s1 < end && isdigit((unsigned char)*s1)) {
-                            f = f * 10 + (*s1) - '0';
-                            s1++;
-
-                            if (s1 < end && *s1 == '_')
-                                s1++;
-                        }
-
-                        return sign * f;
+                        return parse_number(&s1, end, 10);
                     case 'o':
                     case 'O':
                         s1++;
-                        while (s1 < end && isdigit((unsigned char)*s1)
-                                        && *s1 <= '7') {
-                            f = f * 8 + (*s1) - '0';
-
-                            s1++;
-
-                            if (s1 < end && *s1 == '_')
-                                s1++;
-                        }
-
-                        return sign * f;
+                        return parse_number(&s1, end, 8);
                     case 'b':
                     case 'B':
                         s1++;
-                        while (s1 < end && (*s1 == '0' || *s1 == '1')) {
-                            f = f * 2 + (*s1) - '0';
-                            s1++;
-
-                            if (s1 < end && *s1 == '_')
-                                s1++;
-                        }
-
-                        return sign * f;
+                        return parse_number(&s1, end, 2);
                     default:
                         break;
                 }
                 if (s1 < end && *s1 == '_'
-                && isdigit((unsigned char)*(s1+1)))
+                    && isdigit((unsigned char)*(s1 + 1)))
                     s1++;
             }
 
-            /* handle integer part */
-            while (s1 < end && isdigit((unsigned char)*s1)) {
-                f = f * 10 + (*s1) - '0';
+            /* handle \d+:([0..9A..Za..z]) radix notation */
+            if (s1 < end && *s1 == ':') {
                 s1++;
+                radfound = 1;
+		
+                if (s1 >= end)
+                    return f;
 
-                if (s1 < end && *s1 == '_' && isdigit((unsigned char)*(s1+1)))
+                radix = parse_number(&s1, end, 10);
+                if(*s1 != '<') {
+                    return 0.0;
+                } else {
                     s1++;
+                }
             }
 
-            /* handle floating point part */
+
+            /* handle integer part */
+            if (s1 < end && isxdigit((unsigned char)*s1)) {
+                f = parse_number(&s1, end, radix);
+            }
+
+            /* handle doubleing point part */
             if (s1 < end && *s1 == '.') {
-                FLOATVAL frac = 1.0;
                 s1++;
-                while (s1 < end && isdigit((unsigned char)*s1)) {
-                    frac /= 10;
-                    f    += ((*s1) - '0') * frac;
-                    s1++;
-                    if (s1 < end && *s1 == '_'
-                    && isdigit((unsigned char)*(s1+1)))
-                        s1++;
-                }
+                f += parse_fraction(&s1, end, radix);
             }
 
             /* handle exponential part */
             if (s1 < end && (*s1 == 'e' || *s1 == 'E')) {
-                INTVAL exp_val  = 0;
-                INTVAL exp_sign = 1;
+                int exp_val  = 0;
+                int exp_sign = 1;
                 s1++;
 
                 if (s1 < end && *s1 == '+')
@@ -197,6 +218,10 @@
 
                 return sign * f * pow(10.0, exp_val * exp_sign);
             }
+            
+            //FIXME how to check for missing >?
+            if (radfound && *s1 != '>')
+                return 0.0;
         }
 
         return sign * f;

@p6rt
Copy link
Author

p6rt commented Sep 23, 2008

From @moritz

jason switzer (via RT) wrote​:

# New Ticket Created by "jason switzer"
# Please include the string​: [perl #​59222]
# in the subject line of all future correspondence about this issue.
# <URL​: http://rt.perl.org/rt3/Ticket/Display.html?id=59222 >

This patch adds radix notation (​:\d<...>) to the string-to-number conversion
routine Perl6Str​::get_number. The tests haven't been written and
incorporated to test specifically the string conversion but the spectest
suite passes as of r31354 (parrot) and r22313 (t/spec). Cordination with
moritz_ will take place to determine the location and the extent of the
tests that will be created.

The tests should go into t/spec/S02-literals/radix.t if they aren't
there yet.

Not all radix constructs are supported yet (in the works). Currently does
not support nested notation, such as :10<12*23**3> or functional forms such
as :10('1234').

If such things are to be supported in string-to-number conversions (is
that specced? where?), it should really take the way through the
grammar. In the end the user could change what he considers a number by
modifying the grammar. Also note that these expressions could grow
arbitrarily complex.

Index​: perl6str.pmc

--- perl6str.pmc (revision 31353)
+++ perl6str.pmc (working copy)
@​@​ -23,6 +23,58 @​@​
#include <ctype.h>
#include <math.h>

+FLOATVAL parse_number(const char** start, const char* stop, FLOATVAL radix) {
+ FLOATVAL number = 0.0;
+ //continue until the end or until we've hit a non-digit
+ while (*start < stop && isxdigit((unsigned char)**start)) {

Your usage of isxdigit() makes me wonder - will that work for bases > 16?

Moritz

--
Moritz Lenz
http://moritz.faui2k3.org/ | http://perl-6.de/

@p6rt
Copy link
Author

p6rt commented Sep 23, 2008

The RT System itself - Status changed from 'new' to 'open'

@p6rt
Copy link
Author

p6rt commented Sep 23, 2008

From @pmichaud

On Tue, Sep 23, 2008 at 12​:05​:18PM +0200, Moritz Lenz wrote​:

jason switzer (via RT) wrote​:

Not all radix constructs are supported yet (in the works). Currently does
not support nested notation, such as :10<12*23**3> or functional forms such
as :10('1234').

If such things are to be supported in string-to-number conversions (is
that specced? where?), ...

S02​:2156​:

  The generic string-to-number converter will recognize all of these
  forms (including the * form, since constant folding is not available
  to the run time). Also allowed in strings are leading plus or minus,
  and maybe a trailing Units type for an implied scaling. Leading and
  trailing whitespace is ignored.

... it should really take the way through the
grammar. In the end the user could change what he considers a number by
modifying the grammar.

The spec doesn't mention anything about string-to-number conversion
following changes to the the grammar, so Rakudo has taken the opposite
approach -- i.e., the compiler uses the generic string-to-number converter
to evaluate the forms recognized by the grammar. If this is backwards
then the spec likely needs an update.

Also note that these expressions could grow arbitrarily complex.

I think the recogized expression are (currently) limited to the
specific forms outlined in S02.

Index​: perl6str.pmc

--- perl6str.pmc (revision 31353)
+++ perl6str.pmc (working copy)
@​@​ -23,6 +23,58 @​@​
#include <ctype.h>
#include <math.h>

+FLOATVAL parse_number(const char** start, const char* stop, FLOATVAL radix) {
+ FLOATVAL number = 0.0;
+ //continue until the end or until we've hit a non-digit
+ while (*start < stop && isxdigit((unsigned char)**start)) {

Your usage of isxdigit() makes me wonder - will that work for bases > 16?

Correct, that should probably be isalnum() instead.

Pm

@p6rt
Copy link
Author

p6rt commented Sep 24, 2008

From jswitzer@gmail.com

Index​: perl6str.pmc

===================================================================

--- perl6str.pmc (revision 31353)
+++ perl6str.pmc (working copy)
@​@​ -23,6 +23,58 @​@​
#include <ctype.h>
#include <math.h>

+FLOATVAL parse_number(const char** start, const char* stop,
FLOATVAL radix) {
+ FLOATVAL number = 0.0;
+ //continue until the end or until we've hit a non-digit
+ while (*start < stop && isxdigit((unsigned char)**start)) {

Your usage of isxdigit() makes me wonder - will that work for bases
16?

Correct, that should probably be isalnum() instead.

Pm

Good catch. Since I didn't have a complete set of tests for this, I
didn't see that. The attached patch changes the isxdigit() calls to
isalnum(); that is the only change to the patch.

Thanks,

Jason "s1n" Switzer

@p6rt
Copy link
Author

p6rt commented Sep 24, 2008

From jswitzer@gmail.com

radix-v2.patch
Index: perl6str.pmc
===================================================================
--- perl6str.pmc	(revision 31354)
+++ perl6str.pmc	(working copy)
@@ -23,6 +23,58 @@
 #include <ctype.h>
 #include <math.h>
 
+FLOATVAL parse_number(const char** start, const char* stop, FLOATVAL radix) {
+    FLOATVAL number = 0.0;
+    //continue until the end or until we've hit a non-digit
+    while (*start < stop && isalnum((unsigned char)**start)) {
+        /* number must not contain values less than the radix */
+        unsigned int current;
+        if (isdigit(**start)) current = (**start) - '0';
+        else current = tolower(**start) - 'a' + 10;
+        //number += ((*start) - '0') * frac;
+        if (current >= radix)
+            return number;
+
+        number = number * radix + current;
+        (*start)++;
+
+        /* skip past the digit seperator _ */
+        if (*start < stop && **start == '_') {
+            if(!isalnum((unsigned char)*((*start) + 1))) {
+                return number;
+            }
+            (*start)++;
+        }
+    }
+    return number;
+}
+
+FLOATVAL parse_fraction(const char** start, const char* stop, FLOATVAL radix) {
+    FLOATVAL frac = 1.0;
+    FLOATVAL number = 0.0;
+    while (*start < stop && isalnum((unsigned char)**start)) {
+        frac /= radix;
+        unsigned int current;
+        if (isdigit(**start)) current = (**start) - '0';
+        else current = tolower(**start) - 'a' + 10;
+
+        /* if we discover scientific notation, stop parsing */
+        if (tolower(**start) == 'e' && radix == 10)
+            return number;
+        if (current >= radix)
+            return 0;
+        
+        number += current * frac;
+        (*start)++;
+        
+        /* skip past the digit seperator _ */
+        if (*start < stop && **start == '_'
+            && isdigit((unsigned char)**(start + 1)))
+            (*start)++;
+    }
+    return number;
+}
+
 pmclass Perl6Str
     extends  String
     provides string
@@ -63,12 +115,14 @@
 
     FLOATVAL get_number() {
         STRING   *s    = (STRING *)PMC_str_val(SELF);
-        FLOATVAL  sign = 1.0;
-        FLOATVAL  f    = 0.0;
+        FLOATVAL sign = 1.0;
+        FLOATVAL f    = 0.0;
+        FLOATVAL radix = 10.0;
+        int radfound = 0;
 
-        if (s) {
+        if (s && s->strstart) {
             const char *s1         = s->strstart;
-            const char * const end = s1 + s->bufused;
+            const char * const end = s1 + strlen(s1);
 
             /* skip leading whitespace */
             while (s1 < end && isspace((unsigned char)*s1))
@@ -92,92 +146,59 @@
                     case 'x':
                     case 'X':
                         s1++;
-                        while (s1 < end && isxdigit((unsigned char)*s1)) {
-                            f *= 16;
-                            if (isdigit((unsigned char)*s1))
-                                f += (*s1) - '0';
-                            else
-                                f += tolower((unsigned char)*s1) - 'a' + 10;
-
-                            s1++;
-
-                            if (s1 < end && *s1 == '_')
-                                s1++;
-                        }
-                        return sign * f;
+                        return parse_number(&s1, end, 16);
                     case 'd':
                     case 'D':
                         s1++;
-                        while (s1 < end && isdigit((unsigned char)*s1)) {
-                            f = f * 10 + (*s1) - '0';
-                            s1++;
-
-                            if (s1 < end && *s1 == '_')
-                                s1++;
-                        }
-
-                        return sign * f;
+                        return parse_number(&s1, end, 10);
                     case 'o':
                     case 'O':
                         s1++;
-                        while (s1 < end && isdigit((unsigned char)*s1)
-                                        && *s1 <= '7') {
-                            f = f * 8 + (*s1) - '0';
-
-                            s1++;
-
-                            if (s1 < end && *s1 == '_')
-                                s1++;
-                        }
-
-                        return sign * f;
+                        return parse_number(&s1, end, 8);
                     case 'b':
                     case 'B':
                         s1++;
-                        while (s1 < end && (*s1 == '0' || *s1 == '1')) {
-                            f = f * 2 + (*s1) - '0';
-                            s1++;
-
-                            if (s1 < end && *s1 == '_')
-                                s1++;
-                        }
-
-                        return sign * f;
+                        return parse_number(&s1, end, 2);
                     default:
                         break;
                 }
                 if (s1 < end && *s1 == '_'
-                && isdigit((unsigned char)*(s1+1)))
+                    && isdigit((unsigned char)*(s1 + 1)))
                     s1++;
             }
 
-            /* handle integer part */
-            while (s1 < end && isdigit((unsigned char)*s1)) {
-                f = f * 10 + (*s1) - '0';
+            /* handle \d+:([0..9A..Za..z]) radix notation */
+            if (s1 < end && *s1 == ':') {
                 s1++;
+                radfound = 1;
+		
+                if (s1 >= end)
+                    return f;
 
-                if (s1 < end && *s1 == '_' && isdigit((unsigned char)*(s1+1)))
+                radix = parse_number(&s1, end, 10);
+                if(*s1 != '<') {
+                    return 0.0;
+                } else {
                     s1++;
+                }
             }
 
-            /* handle floating point part */
+
+            /* handle integer part */
+            if (s1 < end && isalnum((unsigned char)*s1)) {
+                f = parse_number(&s1, end, radix);
+            }
+
+            /* handle doubleing point part */
             if (s1 < end && *s1 == '.') {
-                FLOATVAL frac = 1.0;
                 s1++;
-                while (s1 < end && isdigit((unsigned char)*s1)) {
-                    frac /= 10;
-                    f    += ((*s1) - '0') * frac;
-                    s1++;
-                    if (s1 < end && *s1 == '_'
-                    && isdigit((unsigned char)*(s1+1)))
-                        s1++;
-                }
+                f += parse_fraction(&s1, end, radix);
             }
 
             /* handle exponential part */
             if (s1 < end && (*s1 == 'e' || *s1 == 'E')) {
-                INTVAL exp_val  = 0;
-                INTVAL exp_sign = 1;
+                int exp_val  = 0;
+                int exp_sign = 1;
                 s1++;
 
                 if (s1 < end && *s1 == '+')
@@ -197,6 +218,10 @@
 
                 return sign * f * pow(10.0, exp_val * exp_sign);
             }
+            
+            //FIXME how to check for missing >?
+            if (radfound && *s1 != '>')
+                return 0.0;
         }
 
         return sign * f;

@p6rt
Copy link
Author

p6rt commented Dec 21, 2008

From @pmichaud

Applied in r34167, thanks!

Pm

1 similar comment
@p6rt
Copy link
Author

p6rt commented Dec 21, 2008

From @pmichaud

Applied in r34167, thanks!

Pm

@p6rt
Copy link
Author

p6rt commented Dec 21, 2008

@pmichaud - Status changed from 'open' to 'resolved'

@p6rt p6rt closed this as completed Dec 21, 2008
@p6rt p6rt added the patch label Jan 5, 2020
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

1 participant