"""\
Easy-to-use regular expression API for Python, where _S() and _R() are the
primitives for everything.  An attempt to recreate the Perl/Ruby regex APIs.
  
    >>> from sane_re import *

Matching:

    >>> _R('[a-z]').match("hiho")          # Match (like =~)
    <Match 0:1>
    >>> _S("hiho").match('[a-z]')          # Match, other way if you want
    <Match 0:1>
    >>> _S("hiho")['h.']                   # Extraction by indexing
    'hi'
    >>> _S("hiho")['(h.)(h.)', 2]          # Extraction by indexing: pick group
    'ho'
    >>> _S("HIHO")['[a-z]']
    None
    >>> _S("HIHO")[ _R('[a-z]', 'i') ]     # Flags as string
    'H'
    >>> _R('[a-z]').matches("omg!")        # Multiple matches
    [<Match 0:1>, <Match 1:2>, <Match 2:3>]
    >>> [x.span for x in _R('[a-z]').matches("omg!")]
    [(0, 1), (1, 2), (2, 3)]


Mashing:

    >>> _S("hiho").gsub('[io]', '_')
    'h_h_'

    >>> _S("hiho").gsub('[io]', lambda m: " |" + m[0] + "| ")
    'h |i| h |o| '

    >>> _S("hiho").split('[io]')
    ['h', 'h', '']

Groups:

    >>> _R('([a-z]+)([0-9]+)').show_match("hello world42 ok")
    hello world42 ok
          0000000   
          1111122   
    >>> match = _R('([a-z]+)([0-9]+)').show_match("hello world42 ok")
    >>> match[0]
    'world42'
    >>> match[1]
    'world'
    >>> match[2]
    '42'
    >>> match.groups
    ('world', '42')
    >>> match.groupspan(1)
    (6, 11)
    >>> match.groupspan(2)
    (11, 13)
"""

"""
More notes

We wrap `re` methods to rejigger their API, to make them more convenient and
consistent.

* Trying to close the usability gap with Perl/Ruby.  _R() is replacement for
  not having regex literal syntax builtin to the language.  _S() adds convenient
  methods, conforming to our conventions, that the string class doesn't have.
* I always forget re.search/match()'s argument order.  Using either _R().match() or
  _S().match() is completely unambiguous.
* Never use re.match(), always re.search().  What good is the former if you
  can use a caret?  Having both only causes confusion.
* Specify flags via a string of lowercase characters -- like open() -- but with
  the standard regex flags from Perl/Ruby/etc.
* Python doesn't have a do-only-one-substitution operation -- in Awk/Perl/Ruby
  terms, it has gsub() but not sub() -- so make naming clearer.
* Match objects have @property accessors like modern Python classes
* Some more advanced functionality here and there
"""

__author__  = "Brendan O'Connor (anyall.org, brenocon@gmail.com)"
__website__ = "http://anyall.org/sane_re.py"
__version__ = "Nov 2009"
__all__ = ['_S','_R','regex_or','pos_lookahead','neg_lookahead','optional']
import re, _sre
from StringIO import StringIO
from types import FunctionType
RegexType = type(re.compile("bla"))

def stringify(s, encoding='utf8', *args):
  if isinstance(s,str): return s
  return s.encode(encoding, *args)

def _S(string):
  string = stringify(string)
  return _Ss(string)
  
class _Ss(str):
  """ wrap a string, endowing it with regex methods """
  def gsub(self, regex, replacement, **kwargs):
    """ like py re.sub or ruby String.gsub """
    return gsub(self, _R(regex), replacement, **kwargs)
  replace = gsub
  def match(self, regex):
    """ like py re.search """
    return match(self, _R(regex))
  def split(self, regex, maxsplit=0):
    """ like py re.split.  overrides wrapped str.split() """
    #if isinstance(regex,(str,unicode)): return str.split(self,regex)
    return _R(regex).sre.split(self, maxsplit)
  def matches(self, regex, group=None):
    """ like py re.finditer """
    def f():
      for m in _R(regex).sre.finditer(self):
        if group is None: yield wrap_match(m)
        else: yield wrap_match(m)[group]
    return list(f())
  def __getitem__(self, args):
    """ like perl string[/regex/] or string[/reg(ex)/, 1] """
    if isinstance(args,int):
      # not for us
      return str.__getitem__(self, args)
    if isinstance(args,tuple):
      regex = args[0]
      group = args[1] if len(args)>1 else 0
    else:
      regex = args
      group = 0
    m = match(self, _R(regex))
    return m and m[group]
  def show_match(self, regex, group=0, numbers=True):
    """ for testing """
    import ansi,sys
    regex = _R(regex)
    def color_a_match(m):
      return ansi.color(m[group],'backblack','lgray')
    print self.gsub(regex, color_a_match)
    if not numbers: return
    groups_per_pos = [[] for i in range(len(self))]
    for m in self.matches(regex):
      for g in range(regex.groups+1):
        for i in range(*m.groupspan(g)):
          groups_per_pos[i].append(g)
    while 1:
      for i in range(len(self)):
        if groups_per_pos[i]:
          sys.stdout.write(str(groups_per_pos[i].pop(0)))
        else:
          sys.stdout.write(" ")
      sys.stdout.write("\n")
      if all(len(x)==0 for x in groups_per_pos): break

def gsub(string, regex, replacement, alignments=False):
  "string and regex need to be sane_re.{_S,_R} wrappers"
  if alignments and not isinstance(replacement, FunctionType):
    s = replacement
    replacement = lambda m: s
  if isinstance(replacement, FunctionType):
    return fancy_sub(string, regex.sre, replacement, alignments=alignments)
  return regex.sre.sub(replacement, string)
def match(string, regex):
  "string and regex need to be sane_re.{_S,_R} wrappers"
  return wrap_match(regex.sre.search(string))

def fancy_sub(string, regex, repl_fn=lambda m: ">> %s <<" % m[0], alignments=False):
  "string and regex need to be sane_re.{_S,_R} wrappers"
  aligns = []
  ret = StringIO()
  last = 0
  for m in string.matches(regex):
    ret.write(string[last:m.start])
    aligns += range(last,m.start)
        
    repl = repl_fn(m)
    ret.write(repl)
    aligns += [m.start]*len(repl)   ## make all the same .. or None too

    last = m.end

  if last < len(string):
    ret.write(string[last:])
    aligns += range(last, len(string))
  if not alignments:
    return ret.getvalue()
  else:
    return ret.getvalue(), aligns

class _R:
  """ regex wrapper.  supports most _S methods too. """
  def __init__(self, arg, flags='', bin_flags=0):
    self.orig = None
    if isinstance(arg, RegexType):
      self.sre = arg
    elif isinstance(arg, _R):
      self.sre = arg.sre
    elif isinstance(arg, (str,unicode)):
      bin_flags |= flag_convert(flags)
      self.sre = re.compile(arg,bin_flags)
      self.orig = arg
    else: raise TypeError
  def __getattr__(self,name): return getattr(self.sre,name)
  def gsub(regex,string,replacement, **kwargs): return gsub(_S(string),regex,replacement, **kwargs)
  replace = gsub
  def match(regex,string): return match(_S(string), regex)
  def split(regex,string,maxsplit=0): return _S(string).split(regex,maxsplit=maxsplit)
  def matches(regex,string,group=None): return _S(string).matches(regex,group=group)
  def show_match(regex,string,**kwargs): return _S(string).show_match(regex,**kwargs)
  def __str__(self): 
    if self.orig: return '/' + self.orig + '/'
    return "<_R with %s>" % repr(self.sre)
  __repr__ = __str__

def wrap_match(sre_match):
  if sre_match is None: return None
  return Match(sre_match)

class Match:
  def __init__(self, sre):
    self.sre = sre
  @property
  def span(self): return self.sre.span()
  @property
  def start(self): return self.sre.start()
  @property
  def end(self): return self.sre.end()
  @property
  def groups(self): return self.sre.groups()
  @property
  def groupdict(self): return self.sre.groupdict()
  def groupspan(self,group):
    if isinstance(group,int): return self.sre.span(group)
    else: raise TypeError
  def groupstart(self,group):
    if isinstance(group,int): return self.sre.start(group)
    else: raise TypeError
  def groupend(self,group):
    if isinstance(group,int): return self.sre.end(group)
    else: raise TypeError
  def __getitem__(self,group):
    if isinstance(group,int):
      return self.sre.group(group)
    if isinstance(group,str):
      return self.sre.groupdict()[group]
    else: raise TypeError
  def __str__(self):
    return "<Match %d:%d>" % self.span
  __repr__ = __str__


flag_mappings = {
    'i':re.IGNORECASE,
    'l':re.LOCALE,
    'm':re.MULTILINE,
    's':re.DOTALL,
    'u':re.UNICODE,
    'x':re.VERBOSE, }

def flag_convert(flags):
  bin_flags = 0
  for flag in flags:
    bin_flags |= flag_mappings[flag]
  return bin_flags

#def _s(x):
#  if isinstance(x,unicode): return _su(x)
#  if isinstance(x,str): return _ss(x)

def fancy_sub_sre(string, sre_regex, repl_fn=lambda m: ">> %s <<" % m.group()):
  """ like ruby String.gsub() when passing in a block """
  ret = StringIO()
  last = 0
  for m in sre_regex.finditer(string):
    ret.write(string[last:m.start()])
    ret.write(repl_fn(m))
    last = m.end()
  if last < len(string):
    ret.write(string[last:])
  return ret.getvalue()

##########################

def regex_or(*items):
  r = '|'.join(items)
  r = '(' + r + ')'
  return r
def pos_lookahead(r):
  return '(?=' + r + ')'
def neg_lookahead(r):
  return '(?!' + r + ')'
def optional(r):
  return '(%s)?' % r
