Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

/\S/ is not the same as /[\S]/ #1182

Closed
p5pRT opened this issue Feb 15, 2000 · 2 comments
Closed

/\S/ is not the same as /[\S]/ #1182

p5pRT opened this issue Feb 15, 2000 · 2 comments

Comments

@p5pRT
Copy link

p5pRT commented Feb 15, 2000

Migrated from rt.perl.org#2152 (status was 'resolved')

Searchable as RT2152$

@p5pRT
Copy link
Author

p5pRT commented Feb 15, 2000

From rick@consumercontact.com

This is a bug report for perl from rick@​consumercontact.com,
generated with the help of perlbug 1.26 running under perl 5.00503.


/[\S]/ is matching more stuff than /\S/

I get the same results with 5.003 on DGUX, 5.004_03 on Linux,
5.005_03 on Windows and 5.5.650 on this platform.

$ perl -e '@​a=map chr,0..255;die if grep(/\S/,@​a)!=grep(/[^\s]/,@​a)'
Died at -e line 1.
$ perl -e '@​a=map chr,0..255;die if grep(/\S/,@​a)!=grep(/[\S]/,@​a)'
Died at -e line 1.
$ perl -e '@​a=map chr,0..255;die if grep(/\s/,@​a)!=grep(/[\s]/,@​a)'
$ perl -e '@​a=map chr,0..255;die if grep(/\s/,@​a)!=grep(/[^\S]/,@​a)'

These are the characters that don't match \S and [\S] on this system.

$ perl -le 'for (0..255) {print unless chr($_) =~ /\S/}'
0
9
10
11
12
13
32
$ perl -le 'for (0..255) {print unless chr($_) =~ /[\S]/}'
9
10
12
13
32



Site configuration information for perl 5.00503​:

Configured by rick at Mon Jul 26 15​:51​:49 EDT 1999.

Summary of my perl5 (5.0 patchlevel 5 subversion 3) configuration​:
  Platform​:
  osname=svr4, osvers=, archname=i386-svr4
  uname='unix_sv consumer 4.2mp 2.1.3 i386 x86at '
  hint=recommended, useposix=true, d_sigaction=define
  usethreads=undef useperlio=undef d_sfio=undef
  Compiler​:
  cc='/bin/cc', optimize='-O', gccversion=
  cppflags='-I/usr/include -I/usr/ucbinclude -I/usr/local/include'
  ccflags ='-I/usr/include -I/usr/ucbinclude -I/usr/local/include'
  stdchar='unsigned char', d_stdstdio=define, usevfork=false
  intsize=4, longsize=4, ptrsize=4, doublesize=8
  d_longlong=undef, longlongsize=, d_longdbl=define, longdblsize=12
  alignbytes=4, usemymalloc=y, prototype=define
  Linker and Libraries​:
  ld='/bin/cc', ldflags ='-L/usr/ccs/lib -L/usr/ucblib -L/usr/local/lib -L/usr/gnu/lib'
  libpth=/usr/local/lib /usr/gnu/lib /shlib /lib /usr/lib /usr/ccs/lib /usr/ucblib
  libs=-lsocket -lnsl -ldbm -ldl -lld -lm -lc -lcrypt -lucb
  libc=, so=so, useshrplib=true, libperl=libperl.so
  Dynamic Linking​:
  dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags=' '
  cccdlflags='-KPIC', lddlflags='-G -L/usr/ccs/lib -L/usr/ucblib -L/usr/local/lib -L/usr/gnu/lib'

Locally applied patches​:
 


@​INC for perl 5.00503​:
  /usr/local/lib/perl5/5.00503/i386-svr4
  /usr/local/lib/perl5/5.00503
  /usr/local/lib/perl5/site_perl/5.005/i386-svr4
  /usr/local/lib/perl5/site_perl/5.005
  .


Environment for perl 5.00503​:
  HOME=/home1/rick
  LANG=C
  LANGUAGE (unset)
  LD_LIBRARY_PATH=/usr/opt/dash/lib​:.​:/usr/lib/ARCserve
  LOGDIR (unset)
  PATH=/usr/opt/dash/ccl_custom​:/usr/opt/dash​:/usr/opt/vsifax/obin​:/data1/ubl​:/usr/local/bin​:/usr/local/shbin​:/usr/bin​:/usr/ccs/bin​:/usr/ucb​:/usr/opt/vsifax/bin​:/usr/lib/ARCserve​:/usr/sbin​:/usr/X/bin​:/data1/time/PROG​:/home1/rick/bin​:/opt/bin​:.
  PERL_BADLANG (unset)
  SHELL=/usr/bin/ksh

@p5pRT
Copy link
Author

p5pRT commented Feb 17, 2000

From [Unknown Contact. See original ticket]

It looks like /\D/ isn't the same as /[\D]/ in 5.5.650 either (it is in
5.005_03).

I have probably naively went too far on this but it seems to work. It
looked to me like the code for DIGIT and SPACE should just be symmetric
with that for ALNUM so I made it that way.

I don't know how to test the UTF8 stuff.

Rick

*** perl5.5.650/regexec.c.old Thu Feb 17 09​:28​:07 2000
--- perl5.5.650/regexec.c Thu Feb 17 13​:36​:34 2000
***************
*** 2084,2090 ****
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case SPACE​:
! if (!nextchr && locinput >= PL_regeol)
  sayNO;
  if (!(OP(scan) == SPACE
  ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
--- 2084,2090 ----
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case SPACE​:
! if (!nextchr)
  sayNO;
  if (!(OP(scan) == SPACE
  ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
***************
*** 2095,2105 ****
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case SPACEUTF8​:
! if (!nextchr && locinput >= PL_regeol)
  sayNO;
  if (nextchr & 0x80) {
  if (!(OP(scan) == SPACEUTF8
! ? swash_fetch(PL_utf8_space,(U8*)locinput)
  : isSPACE_LC_utf8((U8*)locinput)))
  {
  sayNO;
--- 2095,2105 ----
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case SPACEUTF8​:
! if (!nextchr)
  sayNO;
  if (nextchr & 0x80) {
  if (!(OP(scan) == SPACEUTF8
! ? swash_fetch(PL_utf8_space, (U8*)locinput)
  : isSPACE_LC_utf8((U8*)locinput)))
  {
  sayNO;
***************
*** 2117,2125 ****
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case NSPACE​:
! if (!nextchr)
  sayNO;
! if (OP(scan) == SPACE
  ? isSPACE(nextchr) : isSPACE_LC(nextchr))
  sayNO;
  nextchr = UCHARAT(++locinput);
--- 2117,2125 ----
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case NSPACE​:
! if (!nextchr && locinput >= PL_regeol)
  sayNO;
! if (OP(scan) == NSPACE
  ? isSPACE(nextchr) : isSPACE_LC(nextchr))
  sayNO;
  nextchr = UCHARAT(++locinput);
***************
*** 2128,2138 ****
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case NSPACEUTF8​:
! if (!nextchr)
  sayNO;
  if (nextchr & 0x80) {
  if (OP(scan) == NSPACEUTF8
! ? swash_fetch(PL_utf8_space,(U8*)locinput)
  : isSPACE_LC_utf8((U8*)locinput))
  {
  sayNO;
--- 2128,2138 ----
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case NSPACEUTF8​:
! if (!nextchr && locinput >= PL_regeol)
  sayNO;
  if (nextchr & 0x80) {
  if (OP(scan) == NSPACEUTF8
! ? swash_fetch(PL_utf8_space, (U8*)locinput)
  : isSPACE_LC_utf8((U8*)locinput))
  {
  sayNO;
***************
*** 2150,2156 ****
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case DIGIT​:
! if (!nextchr && locinput >= PL_regeol)
  sayNO;
  if (!(OP(scan) == DIGIT
  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
--- 2150,2156 ----
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case DIGIT​:
! if (!nextchr)
  sayNO;
  if (!(OP(scan) == DIGIT
  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
***************
*** 2164,2172 ****
  if (!nextchr)
  sayNO;
  if (nextchr & 0x80) {
! if (OP(scan) == NDIGITUTF8
! ? swash_fetch(PL_utf8_digit,(U8*)locinput)
! : isDIGIT_LC_utf8((U8*)locinput))
  {
  sayNO;
  }
--- 2164,2172 ----
  if (!nextchr)
  sayNO;
  if (nextchr & 0x80) {
! if (!(OP(scan) == DIGITUTF8
! ? swash_fetch(PL_utf8_digit, (U8*)locinput)
! : isDIGIT_LC_utf8((U8*)locinput)))
  {
  sayNO;
  }
***************
*** 2174,2180 ****
  nextchr = UCHARAT(locinput);
  break;
  }
! if (!isDIGIT(nextchr))
  sayNO;
  nextchr = UCHARAT(++locinput);
  break;
--- 2174,2181 ----
  nextchr = UCHARAT(locinput);
  break;
  }
! if (!(OP(scan) == DIGITUTF8
! ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
  sayNO;
  nextchr = UCHARAT(++locinput);
  break;
***************
*** 2182,2190 ****
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case NDIGIT​:
! if (!nextchr)
  sayNO;
! if (OP(scan) == DIGIT
  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
  sayNO;
  nextchr = UCHARAT(++locinput);
--- 2183,2191 ----
  PL_reg_flags |= RF_tainted;
  /* FALL THROUGH */
  case NDIGIT​:
! if (!nextchr && locinput >= PL_regeol)
  sayNO;
! if (OP(scan) == NDIGIT
  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
  sayNO;
  nextchr = UCHARAT(++locinput);
***************
*** 2196,2208 ****
  if (!nextchr && locinput >= PL_regeol)
  sayNO;
  if (nextchr & 0x80) {
! if (swash_fetch(PL_utf8_digit,(U8*)locinput))
  sayNO;
  locinput += PL_utf8skip[nextchr];
  nextchr = UCHARAT(locinput);
  break;
  }
! if (isDIGIT(nextchr))
  sayNO;
  nextchr = UCHARAT(++locinput);
  break;
--- 2197,2214 ----
  if (!nextchr && locinput >= PL_regeol)
  sayNO;
  if (nextchr & 0x80) {
! if (OP(scan) == NDIGITUTF8
! ? swash_fetch(PL_utf8_digit, (U8*)locinput)
! : isDIGIT_LC_utf8((U8*)locinput))
! {
  sayNO;
+ }
  locinput += PL_utf8skip[nextchr];
  nextchr = UCHARAT(locinput);
  break;
  }
! if (OP(scan) == NDIGITUTF8
! ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
  sayNO;
  nextchr = UCHARAT(++locinput);
  break;

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant