Class: Opal::Lexer

Inherits:

Object

Object
Opal::Lexer

show all

Defined in:: opal/lib/opal/parser/lexer.rb

Instance Attribute Summary collapse

#lex_state ⇒ Object
Returns the value of attribute lex_state.
#line ⇒ Object readonly
Returns the value of attribute line.
#scanner ⇒ Object
Returns the value of attribute scanner.
#scope ⇒ Object readonly
Returns the value of attribute scope.
#scope_line ⇒ Object readonly
Returns the value of attribute scope_line.
#strterm ⇒ Object
Returns the value of attribute strterm.

Instance Method Summary collapse

#add_heredoc_content(str_buffer, str_parse) ⇒ Object
#add_string_content(str_buffer, str_parse) ⇒ Object
#after_operator? ⇒ Boolean
#arg? ⇒ Boolean
#beg? ⇒ Boolean
#check(regexp) ⇒ Object
#cmdarg? ⇒ Boolean
#cmdarg_lexpop ⇒ Object
#cmdarg_pop ⇒ Object
#cmdarg_push(n) ⇒ Object
#cond? ⇒ Boolean
#cond_lexpop ⇒ Object
#cond_pop ⇒ Object
#cond_push(n) ⇒ Object
#end? ⇒ Boolean
#heredoc_identifier ⇒ Object
#initialize(source, file) ⇒ Lexer constructor
A new instance of Lexer.
#matched ⇒ Object
#new_strterm(type, start, finish) ⇒ Object
#new_strterm2(type, start, finish) ⇒ Object
#next_string_token ⇒ Object
#next_token ⇒ Object
#process_identifier(matched, cmd_start) ⇒ Object
#process_numeric ⇒ Object
#scan(regexp) ⇒ Object
#space? ⇒ Boolean
#spcarg? ⇒ Boolean
#strterm_expand?(strterm) ⇒ Boolean
#yylex ⇒ Object

Constructor Details

#initialize(source, file) ⇒ `Lexer`

Returns a new instance of Lexer

# File 'opal/lib/opal/parser/lexer.rb', line 11

def initialize(source, file)
  @lex_state  = :expr_beg
  @cond       = 0
  @cmdarg     = 0
  @line       = 1
  @file       = file

  @scanner = StringScanner.new(source)
  @scanner_stack = [@scanner]
end

Instance Attribute Details

#lex_state ⇒ `Object`

Returns the value of attribute lex_state



9
10
11

# File 'opal/lib/opal/parser/lexer.rb', line 9

def lex_state
  @lex_state
end

#line ⇒ `Object` (readonly)

Returns the value of attribute line



7
8
9

# File 'opal/lib/opal/parser/lexer.rb', line 7

def line
  @line
end

#scanner ⇒ `Object`

Returns the value of attribute scanner



9
10
11

# File 'opal/lib/opal/parser/lexer.rb', line 9

def scanner
  @scanner
end

#scope ⇒ `Object` (readonly)

Returns the value of attribute scope



7
8
9

# File 'opal/lib/opal/parser/lexer.rb', line 7

def scope
  @scope
end

#scope_line ⇒ `Object` (readonly)

Returns the value of attribute scope_line



7
8
9

# File 'opal/lib/opal/parser/lexer.rb', line 7

def scope_line
  @scope_line
end

#strterm ⇒ `Object`

Returns the value of attribute strterm



9
10
11

# File 'opal/lib/opal/parser/lexer.rb', line 9

def strterm
  @strterm
end

Instance Method Details

#add_heredoc_content(str_buffer, str_parse) ⇒ `Object`

# File 'opal/lib/opal/parser/lexer.rb', line 245

def add_heredoc_content(str_buffer, str_parse)
  scanner = @scanner

  eos_regx = /[ \t]*#{Regexp.escape(str_parse[:end])}(\r*\n|$)/
  expand = true

  until scanner.eos?
    c = nil
    handled = true

    if scan(/\n/)
      c = scanner.matched
    elsif check(eos_regx) && scanner.bol?
      break # eos!
    elsif expand && check(/#(?=[\$\@\{])/)
      break
    elsif scan(/\\/)
      if str_parse[:type] == :regexp
        if scan(/(.)/)
          c = "\\" + scanner.matched
        end
      else
        c = if scan(/n/)
          "\n"
        elsif scan(/r/)
          "\r"
        elsif scan(/\n/)
          "\n"
        elsif scan(/t/)
          "\t"
        else
          # escaped char doesnt need escaping, so just return it
          scan(/./)
          scanner.matched
        end
      end
    else
      handled = false
    end

    unless handled
      reg = Regexp.new("[^#{Regexp.escape str_parse[:end]}\#\0\\\\\n]+|.")

      scan reg
      c = scanner.matched
    end

    c ||= scanner.matched
    str_buffer << c
  end

  raise "reached EOF while in string" if scanner.eos?
end

#add_string_content(str_buffer, str_parse) ⇒ `Object`

# File 'opal/lib/opal/parser/lexer.rb', line 299

def add_string_content(str_buffer, str_parse)
  scanner = @scanner
  # regexp for end of string/regexp
  # end_str_re = /#{str_parse[:end]}/
  end_str_re = Regexp.new(Regexp.escape(str_parse[:end]))

  expand = strterm_expand?(str_parse)

  words = ['W', 'w'].include? str_parse[:beg]

  until scanner.eos?
    c = nil
    handled = true

    if check end_str_re
      # eos
      # if its just balancing, add it ass normal content..
      if str_parse[:balance] && (str_parse[:nesting] != 0)
        # we only checked above, so actually scan it
        scan end_str_re
        c = scanner.matched
        str_parse[:nesting] -= 1
      else
        # not balancing, so break (eos!)
        break
      end

    elsif str_parse[:balance] and scan Regexp.new(Regexp.escape(str_parse[:beg]))
      str_parse[:nesting] += 1
      c = scanner.matched

    elsif words && scan(/\s/)
      scanner.pos -= 1
      break

    elsif expand && check(/#(?=[\$\@\{])/)
      break

    #elsif scan(/\\\\/)
      #c = scanner.matched

    elsif scan(/\\/)
      if str_parse[:type] == :regexp
        if scan(/(.)/)
          c = "\\" + scanner.matched
        end
      else
        c = if scan(/n/)
          "\n"
        elsif scan(/r/)
          "\r"
        elsif scan(/\n/)
          "\n"
        elsif scan(/t/)
          "\t"
        else
          # escaped char doesnt need escaping, so just return it
          scan(/./)
          scanner.matched
        end 
      end
    else
      handled = false
    end

    unless handled
      reg = if words
              Regexp.new("[^#{Regexp.escape str_parse[:end]}\#\0\n\ \\\\]+|.")
            elsif str_parse[:balance]
              Regexp.new("[^#{Regexp.escape str_parse[:end]}#{Regexp.escape str_parse[:beg]}\#\0\\\\]+|.")
            else
              Regexp.new("[^#{Regexp.escape str_parse[:end]}\#\0\\\\]+|.")
            end

      scan reg
      c = scanner.matched
    end

    c ||= scanner.matched
    str_buffer << c
  end

  raise "reached EOF while in string" if scanner.eos?
end

#after_operator? ⇒ `Boolean`

Returns:

(Boolean)



66
67
68

# File 'opal/lib/opal/parser/lexer.rb', line 66

def after_operator?
  [:expr_fname, :expr_dot].include? @lex_state
end

#arg? ⇒ `Boolean`

Returns:

(Boolean)



54
55
56

# File 'opal/lib/opal/parser/lexer.rb', line 54

def arg?
  [:expr_arg, :expr_cmdarg].include? @lex_state
end

#beg? ⇒ `Boolean`

Returns:

(Boolean)



62
63
64

# File 'opal/lib/opal/parser/lexer.rb', line 62

def beg?
  [:expr_beg, :expr_value, :expr_mid, :expr_class].include? @lex_state
end

#check(regexp) ⇒ `Object`



82
83
84

# File 'opal/lib/opal/parser/lexer.rb', line 82

def check(regexp)
  @scanner.check regexp
end

#cmdarg? ⇒ `Boolean`

Returns:

(Boolean)



50
51
52

# File 'opal/lib/opal/parser/lexer.rb', line 50

def cmdarg?
  (@cmdarg & 1) != 0
end

#cmdarg_lexpop ⇒ `Object`



46
47
48

# File 'opal/lib/opal/parser/lexer.rb', line 46

def cmdarg_lexpop
  @cmdarg = (@cmdarg >> 1) | (@cmdarg & 1)
end

#cmdarg_pop ⇒ `Object`



42
43
44

# File 'opal/lib/opal/parser/lexer.rb', line 42

def cmdarg_pop
  @cmdarg = @cmdarg >> 1
end

#cmdarg_push(n) ⇒ `Object`



38
39
40

# File 'opal/lib/opal/parser/lexer.rb', line 38

def cmdarg_push(n)
  @cmdarg = (@cmdarg << 1) | (n & 1)
end

#cond? ⇒ `Boolean`

Returns:

(Boolean)



34
35
36

# File 'opal/lib/opal/parser/lexer.rb', line 34

def cond?
  (@cond & 1) != 0
end

#cond_lexpop ⇒ `Object`



30
31
32

# File 'opal/lib/opal/parser/lexer.rb', line 30

def cond_lexpop
  @cond = (@cond >> 1) | (@cond & 1)
end

#cond_pop ⇒ `Object`



26
27
28

# File 'opal/lib/opal/parser/lexer.rb', line 26

def cond_pop
  @cond = @cond >> 1
end

#cond_push(n) ⇒ `Object`



22
23
24

# File 'opal/lib/opal/parser/lexer.rb', line 22

def cond_push(n)
  @cond = (@cond << 1) | (n & 1)
end

#end? ⇒ `Boolean`

Returns:

(Boolean)



58
59
60

# File 'opal/lib/opal/parser/lexer.rb', line 58

def end?
  [:expr_end, :expr_endarg, :expr_endfn].include? @lex_state
end

#heredoc_identifier ⇒ `Object`

# File 'opal/lib/opal/parser/lexer.rb', line 384

def heredoc_identifier
  if @scanner.scan(/(-?)['"]?(\w+)['"]?/)
    heredoc = @scanner[2]
    self.strterm = new_strterm(:heredoc, heredoc, heredoc)

    # if ruby code at end of line after heredoc, we have to store it to
    # parse after heredoc is finished parsing
    end_of_line = @scanner.scan(/.*\n/)
    self.strterm[:scanner] = StringScanner.new(end_of_line) if end_of_line != "\n"

    return :tSTRING_BEG, heredoc
  end
end

#matched ⇒ `Object`



86
87
88

# File 'opal/lib/opal/parser/lexer.rb', line 86

def matched
  @scanner.matched
end

#new_strterm(type, start, finish) ⇒ `Object`



100
101
102

# File 'opal/lib/opal/parser/lexer.rb', line 100

def new_strterm(type, start, finish)
  { :type => type, :beg => start, :end => finish }
end

#new_strterm2(type, start, finish) ⇒ `Object`

# File 'opal/lib/opal/parser/lexer.rb', line 104

def new_strterm2(type, start, finish)
  term = new_strterm(type, start, finish)
  term.merge({ :balance => true, :nesting => 0 })
end

#next_string_token ⇒ `Object`

# File 'opal/lib/opal/parser/lexer.rb', line 128

def next_string_token
  str_parse = self.strterm
  scanner = @scanner
  space = false

  expand = strterm_expand?(str_parse)

  words = ['w', 'W'].include? str_parse[:beg]

  space = true if ['w', 'W'].include?(str_parse[:beg]) and scan(/\s+/)

  # if not end of string, so we must be parsing contents
  str_buffer = []

  if str_parse[:type] == :heredoc
    eos_regx = /[ \t]*#{Regexp.escape(str_parse[:end])}(\r*\n|$)/

    if check(eos_regx)
      scan(/[ \t]*#{Regexp.escape(str_parse[:end])}/)
      self.strterm = nil

      if str_parse[:scanner]
        @scanner_stack << str_parse[:scanner]
        @scanner = str_parse[:scanner]
      end

      @lex_state = :expr_end
      return :tSTRING_END, scanner.matched
    end
  end

  # see if we can read end of string/xstring/regexp markers
  # if scan /#{str_parse[:end]}/
  if scan Regexp.new(Regexp.escape(str_parse[:end]))
    if words && !str_parse[:done_last_space]#&& space
      str_parse[:done_last_space] = true
      scanner.pos -= 1
      return :tSPACE, ' '
    end
    self.strterm = nil

    if str_parse[:balance]
      if str_parse[:nesting] == 0
        @lex_state = :expr_end

        if str_parse[:type] == :regexp
          result = scan(/\w+/)
          return :tREGEXP_END, result
        end
        return :tSTRING_END, scanner.matched
      else
        str_buffer << scanner.matched
        str_parse[:nesting] -= 1
        self.strterm = str_parse
      end

    elsif ['"', "'"].include? str_parse[:beg]
      @lex_state = :expr_end
      return :tSTRING_END, scanner.matched

    elsif str_parse[:beg] == '`'
      @lex_state = :expr_end
      return :tSTRING_END, scanner.matched

    elsif str_parse[:beg] == '/' || str_parse[:type] == :regexp
      result = scan(/\w+/)
      @lex_state = :expr_end
      return :tREGEXP_END, result

    else
      if str_parse[:scanner]
        @scanner_stack << str_parse[:scanner]
        @scanner = str_parse[:scanner]
      end

      @lex_state = :expr_end
      return :tSTRING_END, scanner.matched
    end
  end

  return :tSPACE, ' ' if space

  if str_parse[:balance] and scan Regexp.new(Regexp.escape(str_parse[:beg]))
    str_buffer << scanner.matched
    str_parse[:nesting] += 1
  elsif check(/#[@$]/)
    scan(/#/)
    if expand
      return :tSTRING_DVAR, scanner.matched
    else
      str_buffer << scanner.matched
    end

  elsif scan(/#\{/)
    if expand
      # we are into ruby code, so stop parsing content (for now)
      return :tSTRING_DBEG, scanner.matched
    else
      str_buffer << scanner.matched
    end

  # causes error, so we will just collect it later on with other text
  elsif scan(/\#/)
    str_buffer << '#'
  end

  if str_parse[:type] == :heredoc
    add_heredoc_content str_buffer, str_parse
  else
    add_string_content str_buffer, str_parse
  end

  complete_str = str_buffer.join ''
  @line += complete_str.count("\n")
  return :tSTRING_CONTENT, complete_str
end

#next_token ⇒ `Object`



90
91
92

# File 'opal/lib/opal/parser/lexer.rb', line 90

def next_token
  self.yylex
end

#process_identifier(matched, cmd_start) ⇒ `Object`

# File 'opal/lib/opal/parser/lexer.rb', line 398

def process_identifier(matched, cmd_start)
  scanner = @scanner
  matched = scanner.matched

  if scanner.peek(2) != '::' && scan(/:/)
    @lex_state = :expr_beg
    return :tLABEL, "#{matched}"
  end

  if matched == 'defined?'
    if after_operator?
      @lex_state = :expr_end
      return :tIDENTIFIER, matched
    end

    @lex_state = :expr_arg
    return :kDEFINED, 'defined?'
  end

  if matched.end_with? '?', '!'
    result = :tIDENTIFIER
  else
    if @lex_state == :expr_fname
      if scan(/\=/)
        result = :tIDENTIFIER
        matched += scanner.matched
      end

    elsif matched =~ /^[A-Z]/
      result = :tCONSTANT
    else
      result = :tIDENTIFIER
    end
  end

  if @lex_state != :expr_dot and kw = Keywords.keyword(matched)
    old_state = @lex_state
    @lex_state = kw.state

    if old_state == :expr_fname
      return [kw.id[0], kw.name]
    end

    if @lex_state == :expr_beg
      cmd_start = true
    end

    if matched == "do"
      if after_operator?
        @lex_state = :expr_end
        return :tIDENTIFIER, matched
      end

      if @start_of_lambda
        @start_of_lambda = false
        @lex_state = :expr_beg
        return [:kDO_LAMBDA, scanner.matched]
      elsif cond?
        @lex_state = :expr_beg
        return :kDO_COND, matched
      elsif cmdarg? && @lex_state != :expr_cmdarg
        @lex_state = :expr_beg
        return :kDO_BLOCK, matched
      elsif @lex_state == :expr_endarg
        return :kDO_BLOCK, matched
      else
        @lex_state = :expr_beg
        return :kDO, matched
      end
    else
      if old_state == :expr_beg or old_state == :expr_value
        return [kw.id[0], matched]
      else
        if kw.id[0] != kw.id[1]
          @lex_state = :expr_beg
        end

        return [kw.id[1], matched]
      end
    end
  end

  if [:expr_beg, :expr_dot, :expr_mid, :expr_arg, :expr_cmdarg].include? @lex_state
    @lex_state = cmd_start ? :expr_cmdarg : :expr_arg
  else
    @lex_state = :expr_end
  end

  return [matched =~ /^[A-Z]/ ? :tCONSTANT : :tIDENTIFIER, matched]
end

#process_numeric ⇒ `Object`

# File 'opal/lib/opal/parser/lexer.rb', line 109

def process_numeric
  @lex_state = :expr_end
  scanner = @scanner

  if scan(/0b?(0|1|_)+/)
    return [:tINTEGER, scanner.matched.to_i(2)]
  elsif scan(/0o?([0-7]|_)+/)
    return [:tINTEGER, scanner.matched.to_i(8)]
  elsif scan(/[\d_]+\.[\d_]+\b|[\d_]+(\.[\d_]+)?[eE][-+]?[\d_]+\b/)
    return [:tFLOAT, scanner.matched.gsub(/_/, '').to_f]
  elsif scan(/[\d_]+\b/)
    return [:tINTEGER, scanner.matched.gsub(/_/, '').to_i]
  elsif scan(/0(x|X)(\d|[a-f]|[A-F]|_)+/)
    return [:tINTEGER, scanner.matched.to_i(16)]
  else
    raise "Lexing error on numeric type: `#{scanner.peek 5}`"
  end
end

#scan(regexp) ⇒ `Object`



78
79
80

# File 'opal/lib/opal/parser/lexer.rb', line 78

def scan(regexp)
  @scanner.scan regexp
end

#space? ⇒ `Boolean`

Returns:

(Boolean)



74
75
76

# File 'opal/lib/opal/parser/lexer.rb', line 74

def space?
  @scanner.check(/\s/)
end

#spcarg? ⇒ `Boolean`

Returns:

(Boolean)



70
71
72

# File 'opal/lib/opal/parser/lexer.rb', line 70

def spcarg?
  arg? and @space_seen and !space?
end

#strterm_expand?(strterm) ⇒ `Boolean`

Returns:

(Boolean)

# File 'opal/lib/opal/parser/lexer.rb', line 94

def strterm_expand?(strterm)
  type = strterm[:type]

  [:dquote, :dsym, :dword, :heredoc, :xquote, :regexp].include? type
end

#yylex ⇒ `Object`

# File 'opal/lib/opal/parser/lexer.rb', line 489

def yylex
  @space_seen = false
  cmd_start = false
  c = ''

  if self.strterm
    return next_string_token
  end

  while true
    if scan(/\ |\t|\r/)
      @space_seen = true
      next

    elsif scan(/(\n|#)/)
      c = scanner.matched
      if c == '#' then scan(/(.*)/) else @line += 1; end

      scan(/(\n+)/)
      @line += scanner.matched.length if scanner.matched

      next if [:expr_beg, :expr_dot].include? @lex_state

      if scan(/([\ \t\r\f\v]*)\./)
        @space_seen = true unless scanner[1].empty?
        scanner.pos = scanner.pos - 1

        next unless check(/\.\./)
      end

      cmd_start = true
      @lex_state = :expr_beg
      return :tNL, '\\n'

    elsif scan(/\;/)
      @lex_state = :expr_beg
      return :tSEMI, ';'

    elsif scan(/\*/)
      if scan(/\*/)
        if scan(/\=/)
          @lex_state = :expr_beg
          return :tOP_ASGN, '**'
        end

        if @lex_state == :expr_fname or @lex_state == :expr_dot
          @lex_state = :expr_arg
        else
          @lex_state = :expr_beg
        end

        return :tPOW, '**'

      else
        if scan(/\=/)
          @lex_state = :expr_beg
          return :tOP_ASGN, '*'
        end
      end

      if scan(/\*\=/)
        @lex_state = :expr_beg
        return :tOP_ASGN, '**'
      end

      if scan(/\*/)
        if after_operator?
          @lex_state = :expr_arg
        else
          @lex_state = :expr_beg
        end

        return :tPOW, '**'
      end

      if scan(/\=/)
        @lex_state = :expr_beg
        return :tOP_ASGN, '*'
      else
        result = '*'
        if @lex_state == :expr_fname or @lex_state == :expr_dot
          @lex_state = :expr_arg
          return :tSTAR2, result
        elsif @space_seen && check(/\S/)
          @lex_state = :expr_beg
          return :tSTAR, result
        elsif [:expr_beg, :expr_mid].include? @lex_state
          @lex_state = :expr_beg
          return :tSTAR, result
        else
          @lex_state = :expr_beg
          return :tSTAR2, result
        end
      end

    elsif scan(/\!/)
      c = scan(/./)
      if after_operator?
        @lex_state = :expr_arg
        if c == "@"
          return :tBANG, '!'
        end
      else
        @lex_state = :expr_beg
      end

      if c == '='
        return :tNEQ, '!='
      elsif c == '~'
        return :tNMATCH, '!~'
      end

      scanner.pos = scanner.pos - 1
      return :tBANG, '!'

    elsif scan(/\=/)
      if @lex_state == :expr_beg and !@space_seen
        if scan(/begin/) and space?
          scan(/(.*)/) # end of line
          line_count = 0

          while true
            if scanner.eos?
              raise "embedded document meets end of file"
            end

            if scan(/\=end/) and space?
              @line += line_count
              return next_token
            end

            if scan(/\n/)
              line_count += 1
              next
            end

            scan(/(.*)/)
          end
        end
      end

      @lex_state = if after_operator?
                     :expr_arg
                   else
                     :expr_beg
                   end

      if scan(/\=/)
        if scan(/\=/)
          return :tEQQ, '==='
        end

        return :tEQ, '=='
      end

      if scan(/\~/)
        return :tMATCH, '=~'
      elsif scan(/\>/)
        return :tASSOC, '=>'
      end

      return :tEQL, '='

    elsif scan(/\"/)
      self.strterm = new_strterm(:dquote, '"', '"')
      return :tSTRING_BEG, scanner.matched

    elsif scan(/\'/)
      self.strterm = new_strterm(:squote, "'", "'")
      return :tSTRING_BEG, scanner.matched

    elsif scan(/\`/)
      self.strterm = new_strterm(:xquote, '`', '`')
      return :tXSTRING_BEG, scanner.matched

    elsif scan(/\&/)
      if scan(/\&/)
        @lex_state = :expr_beg

        if scan(/\=/)
          return :tOP_ASGN, '&&'
        end

        return :tANDOP, '&&'

      elsif scan(/\=/)
        @lex_state = :expr_beg
        return :tOP_ASGN, '&'
      end

      if spcarg?
        #puts "warning: `&' interpreted as argument prefix"
        result = :tAMPER
      elsif beg?
        result = :tAMPER
      else
        #puts "warn_balanced: & argument prefix"
        result = :tAMPER2
      end

      @lex_state = after_operator? ? :expr_arg : :expr_beg
      return result, '&'

    elsif scan(/\|/)
      if scan(/\|/)
        @lex_state = :expr_beg
        if scan(/\=/)
          return :tOP_ASGN, '||'
        end

        return :tOROP, '||'

      elsif scan(/\=/)
        return :tOP_ASGN, '|'
      end

      @lex_state = after_operator?() ? :expr_arg : :expr_beg
      return :tPIPE, '|'

    elsif scan(/\%[QqWwixr]/)
      str_type = scanner.matched[1, 1]
      paren = scan(/./)

      term = case paren
             when '(' then ')'
             when '[' then ']'
             when '{' then '}'
             else paren
             end

      case str_type
      when 'Q'
        self.strterm = new_strterm2(:dquote, paren, term)
        return :tSTRING_BEG, scanner.matched
      when 'q'
        self.strterm = new_strterm2(:squote, paren, term)
        return :tSTRING_BEG, scanner.matched
      when 'W'
        self.strterm = new_strterm(:dword, 'W', term)
        scan(/\s*/)
        return :tWORDS_BEG, scanner.matched
      when 'w', 'i'
        self.strterm = new_strterm(:sword, 'w', term)
        scan(/\s*/)
        return :tAWORDS_BEG, scanner.matched
      when 'x'
        self.strterm = new_strterm2(:xquote, paren, term)
        return :tXSTRING_BEG, scanner.matched
      when 'r'
        self.strterm = new_strterm2(:regexp, paren, term)
        return :tREGEXP_BEG, scanner.matched
      end

    elsif scan(/\//)
      if [:expr_beg, :expr_mid].include? @lex_state
        self.strterm = new_strterm(:regexp, '/', '/')
        return :tREGEXP_BEG, scanner.matched
      elsif scan(/\=/)
        @lex_state = :expr_beg
        return :tOP_ASGN, '/'
      elsif @lex_state == :expr_fname or @lex_state == :expr_dot
        @lex_state = :expr_arg
      elsif @lex_state == :expr_cmdarg || @lex_state == :expr_arg
        if !check(/\s/) && @space_seen
          self.strterm = new_strterm(:regexp, '/', '/')
          return :tREGEXP_BEG, scanner.matched
        end
      else
        @lex_state = :expr_beg
      end

      return :tDIVIDE, '/'

    elsif scan(/\%/)
      if scan(/\=/)
        @lex_state = :expr_beg
        return :tOP_ASGN, '%'
      elsif check(/[^\s]/)
        if @lex_state == :expr_beg or (@lex_state == :expr_arg && @space_seen)
          start_word  = scan(/./)
          end_word    = { '(' => ')', '[' => ']', '{' => '}' }[start_word] || start_word
          self.strterm = new_strterm2(:dquote, start_word, end_word)
          return :tSTRING_BEG, scanner.matched
        end
      end

      @lex_state = after_operator? ? :expr_arg : :expr_beg

      return :tPERCENT, '%'

    elsif scan(/\\/)
      if scan(/\r?\n/)
        @space_seen = true
        next
      end

      raise SyntaxError, "backslash must appear before newline :#{@file}:#{@line}"

    elsif scan(/\(/)
      result = scanner.matched
      if [:expr_beg, :expr_mid].include? @lex_state
        result = :tLPAREN
      elsif @space_seen && [:expr_arg, :expr_cmdarg].include?(@lex_state)
        result = :tLPAREN_ARG
      else
        result = :tLPAREN2
      end

      @lex_state = :expr_beg
      cond_push 0
      cmdarg_push 0

      return result, scanner.matched

    elsif scan(/\)/)
      cond_lexpop
      cmdarg_lexpop
      @lex_state = :expr_end
      return :tRPAREN, scanner.matched

    elsif scan(/\[/)
      result = scanner.matched

      if [:expr_fname, :expr_dot].include? @lex_state
        @lex_state = :expr_arg
        if scan(/\]=/)
          return :tASET, '[]='
        elsif scan(/\]/)
          return :tAREF, '[]'
        else
          raise "Unexpected '[' token"
        end
      elsif [:expr_beg, :expr_mid].include?(@lex_state) || @space_seen
        @lex_state = :expr_beg
        cond_push 0
        cmdarg_push 0
        return :tLBRACK, scanner.matched
      else
        @lex_state = :expr_beg
        cond_push 0
        cmdarg_push 0
        return :tLBRACK2, scanner.matched
      end

    elsif scan(/\]/)
      cond_lexpop
      cmdarg_lexpop
      @lex_state = :expr_end
      return :tRBRACK, scanner.matched

    elsif scan(/\}/)
      cond_lexpop
      cmdarg_lexpop
      @lex_state = :expr_end

      return :tRCURLY, scanner.matched

    elsif scan(/\.\.\./)
      @lex_state = :expr_beg
      return :tDOT3, scanner.matched

    elsif scan(/\.\./)
      @lex_state = :expr_beg
      return :tDOT2, scanner.matched

    elsif scan(/\./)
      @lex_state = :expr_dot unless @lex_state == :expr_fname
      return :tDOT, scanner.matched

    elsif scan(/\:\:/)
      if [:expr_beg, :expr_mid, :expr_class].include? @lex_state
        @lex_state = :expr_beg
        return :tCOLON3, scanner.matched
      elsif @space_seen && @lex_state == :expr_arg
        @lex_state = :expr_beg
        return :tCOLON3, scanner.matched
      end

      @lex_state = :expr_dot
      return :tCOLON2, scanner.matched

    elsif scan(/\:/)
      if end? || check(/\s/)
        unless check(/\w/)
          @lex_state = :expr_beg
          return :tCOLON, ':'
        end

        @lex_state = :expr_fname
        return :tSYMBEG, ':'
      end

      if scan(/\'/)
        self.strterm = new_strterm(:ssym, "'", "'")
      elsif scan(/\"/)
        self.strterm = new_strterm(:dsym, '"', '"')
      end

      @lex_state = :expr_fname
      return :tSYMBEG, ':'

    elsif scan(/\^\=/)
      @lex_state = :expr_beg
      return :tOP_ASGN, '^'
    elsif scan(/\^/)
      if @lex_state == :expr_fname or @lex_state == :expr_dot
        @lex_state = :expr_arg
        return :tCARET, scanner.matched
      end

      @lex_state = :expr_beg
      return :tCARET, scanner.matched

    elsif check(/\</)
      if scan(/\<\<\=/)
        @lex_state = :expr_beg
        return :tOP_ASGN, '<<'
      elsif scan(/\<\</)
        if @lex_state == :expr_fname or @lex_state == :expr_dot
          @lex_state = :expr_arg
          return :tLSHFT, '<<'
        elsif ![:expr_dot, :expr_class].include?(@lex_state) && !end? && (!arg? || @space_seen)
          if token = heredoc_identifier
            return token
          end

          @lex_state = :expr_beg
          return :tLSHFT, '<<'
        end
        @lex_state = :expr_beg
        return :tLSHFT, '<<'
      elsif scan(/\<\=\>/)
        if after_operator?
          @lex_state = :expr_arg
        else
          if @lex_state == :expr_class
            cmd_start = true
          end

          @lex_state = :expr_beg
        end

        return :tCMP, '<=>'
      elsif scan(/\<\=/)
        if @lex_state == :expr_fname or @lex_state == :expr_dot
          @lex_state = :expr_arg
        else
          @lex_state = :expr_beg
        end
        return :tLEQ, '<='
      elsif scan(/\</)
        if @lex_state == :expr_fname or @lex_state == :expr_dot
          @lex_state = :expr_arg
        else
          @lex_state = :expr_beg
        end
        return :tLT, '<'
      end

    elsif check(/\>/)
      if scan(/\>\>\=/)
        return :tOP_ASGN, '>>'
      elsif scan(/\>\>/)
        if @lex_state == :expr_fname or @lex_state == :expr_dot
          @lex_state = :expr_arg
        else
          @lex_state = :expr_beg
        end
        return :tRSHFT, '>>'
      elsif scan(/\>\=/)
        if @lex_state == :expr_fname or @lex_state == :expr_dot
          @lex_state = :expr_end
        else
          @lex_state = :expr_beg
        end
        return :tGEQ, scanner.matched
      elsif scan(/\>/)
        if @lex_state == :expr_fname or @lex_state == :expr_dot
          @lex_state = :expr_arg
        else
          @lex_state = :expr_beg
        end
        return :tGT, '>'
      end

    elsif scan(/->/)
      # FIXME: # should be :expr_arg, but '(' breaks it...
      @lex_state = :expr_end
      @start_of_lambda = true
      return [:tLAMBDA, scanner.matched]

    elsif scan(/[+-]/)
      matched = scanner.matched
      sign, utype = if matched == '+'
                      [:tPLUS, :tUPLUS]
                    else
                      [:tMINUS, :tUMINUS]
                    end

      if beg?
        @lex_state = :expr_mid
        return [utype, matched]
      elsif after_operator?
        @lex_state = :expr_arg
        return [:tIDENTIFIER, matched + '@'] if scan(/@/)
        return [sign, matched]
      end

      if scan(/\=/)
        @lex_state = :expr_beg
        return [:tOP_ASGN, matched]
      end

      if arg?
        if !space? && @space_seen
          @lex_state = :expr_mid
          return [utype, matched]
        end
      end

      @lex_state = :expr_beg
      return [sign, sign]

    elsif scan(/\?/)
      if end?
        @lex_state = :expr_beg
        return :tEH, scanner.matched
      end

      unless check(/\ |\t|\r|\s/)
        @lex_state = :expr_end
        return :tSTRING, scan(/./)
      end

      @lex_state = :expr_beg
      return :tEH, scanner.matched

    elsif scan(/\~/)
      if @lex_state == :expr_fname
        @lex_state = :expr_end
        return :tTILDE, '~'
      end
      @lex_state = :expr_beg
      return :tTILDE, '~'

    elsif check(/\$/)
      if scan(/\$([1-9]\d*)/)
        @lex_state = :expr_end
        return :tNTH_REF, scanner.matched.sub('$', '')

      elsif scan(/(\$_)(\w+)/)
        @lex_state = :expr_end
        return :tGVAR, scanner.matched

      elsif scan(/\$[\+\'\`\&!@\"~*$?\/\\:;=.,<>_]/)
        @lex_state = :expr_end
        return :tGVAR, scanner.matched
      elsif scan(/\$\w+/)
        @lex_state = :expr_end
        return :tGVAR, scanner.matched
      else
        raise "Bad gvar name: #{scanner.peek(5).inspect}"
      end

    elsif scan(/\$\w+/)
      @lex_state = :expr_end
      return :tGVAR, scanner.matched

    elsif scan(/\@\@\w*/)
      @lex_state = :expr_end
      return :tCVAR, scanner.matched

    elsif scan(/\@\w*/)
      @lex_state = :expr_end
      return :tIVAR, scanner.matched

    elsif scan(/\,/)
      @lex_state = :expr_beg
      return :tCOMMA, scanner.matched

    elsif scan(/\{/)
      if @start_of_lambda
        @start_of_lambda = false
        @lex_state = :expr_beg
        return [:tLAMBEG, scanner.matched]

      elsif [:expr_end, :expr_arg, :expr_cmdarg].include? @lex_state
        result = :tLCURLY
      elsif @lex_state == :expr_endarg
        result = :LBRACE_ARG
      else
        result = '{'
      end

      @lex_state = :expr_beg
      cond_push 0
      cmdarg_push 0
      return result, scanner.matched

    elsif check(/[0-9]/)
      return process_numeric

    elsif scan(/(\w)+[\?\!]?/)
      return process_identifier scanner.matched, cmd_start
    end

    if scanner.eos?
      if @scanner_stack.size == 1 # our main scanner, we cant pop this
        return [false, false]
      else # we were probably parsing a heredoc, so pop that parser and continue
        @scanner_stack.pop
        @scanner = @scanner_stack.last
        return next_token
      end
    end

    raise "Unexpected content in parsing stream `#{scanner.peek 5}` :#{@file}:#{@line}"
  end
end

Class: Opal::Lexer

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, file) ⇒ Lexer

Instance Attribute Details

#lex_state ⇒ Object

#line ⇒ Object (readonly)

#scanner ⇒ Object

#scope ⇒ Object (readonly)

#scope_line ⇒ Object (readonly)

#strterm ⇒ Object

Instance Method Details

#add_heredoc_content(str_buffer, str_parse) ⇒ Object

#add_string_content(str_buffer, str_parse) ⇒ Object

#after_operator? ⇒ Boolean

#arg? ⇒ Boolean

#beg? ⇒ Boolean

#check(regexp) ⇒ Object

#cmdarg? ⇒ Boolean

#cmdarg_lexpop ⇒ Object

#cmdarg_pop ⇒ Object

#cmdarg_push(n) ⇒ Object

#cond? ⇒ Boolean

#cond_lexpop ⇒ Object

#cond_pop ⇒ Object

#cond_push(n) ⇒ Object

#end? ⇒ Boolean

#heredoc_identifier ⇒ Object

#matched ⇒ Object

#new_strterm(type, start, finish) ⇒ Object

#new_strterm2(type, start, finish) ⇒ Object

#next_string_token ⇒ Object

#next_token ⇒ Object

#process_identifier(matched, cmd_start) ⇒ Object

#process_numeric ⇒ Object

#scan(regexp) ⇒ Object

#space? ⇒ Boolean

#spcarg? ⇒ Boolean

#strterm_expand?(strterm) ⇒ Boolean

#yylex ⇒ Object

#initialize(source, file) ⇒ `Lexer`

#lex_state ⇒ `Object`

#line ⇒ `Object` (readonly)

#scanner ⇒ `Object`

#scope ⇒ `Object` (readonly)

#scope_line ⇒ `Object` (readonly)

#strterm ⇒ `Object`

#add_heredoc_content(str_buffer, str_parse) ⇒ `Object`

#add_string_content(str_buffer, str_parse) ⇒ `Object`

#after_operator? ⇒ `Boolean`

#arg? ⇒ `Boolean`

#beg? ⇒ `Boolean`

#check(regexp) ⇒ `Object`

#cmdarg? ⇒ `Boolean`

#cmdarg_lexpop ⇒ `Object`

#cmdarg_pop ⇒ `Object`

#cmdarg_push(n) ⇒ `Object`

#cond? ⇒ `Boolean`

#cond_lexpop ⇒ `Object`

#cond_pop ⇒ `Object`

#cond_push(n) ⇒ `Object`

#end? ⇒ `Boolean`

#heredoc_identifier ⇒ `Object`

#matched ⇒ `Object`

#new_strterm(type, start, finish) ⇒ `Object`

#new_strterm2(type, start, finish) ⇒ `Object`

#next_string_token ⇒ `Object`

#next_token ⇒ `Object`

#process_identifier(matched, cmd_start) ⇒ `Object`

#process_numeric ⇒ `Object`

#scan(regexp) ⇒ `Object`

#space? ⇒ `Boolean`

#spcarg? ⇒ `Boolean`

#strterm_expand?(strterm) ⇒ `Boolean`

#yylex ⇒ `Object`