// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Original Go source here:
// http://code.google.com/p/go/source/browse/src/pkg/regexp/regexp.go

package scala.scalanative
package regex

// MachineInput abstracts different representations of the input text
// supplied to the Machine.  It provides one-character lookahead.
abstract class MachineInput {

  // Returns the rune at the specified index the units are
  // unspecified, but could be UTF-8 byte, UTF-16 char, or rune
  // indices.  Returns the width (in the same units) of the rune in
  // the lower 3 bits, and the rune (Unicode code point) in the high
  // bits.  Never negative, except for EOF which is represented as -1
  // << 3 | 0.
  def step(pos: Int): Int

  // can we look ahead without losing info?
  def canCheckPrefix(): Boolean

  // Returns the index relative to |pos| at which |re2.prefix| is found
  // in this input stream, or a negative value if not found.
  def index(re2: RE2, pos: Int): Int

  // Returns a bitmask of EMPTY_* flags.
  def context(pos: Int): Int

  // Returns the end position in the same units as step().
  def endPos(): Int
}

object MachineInput {

  final val EOF = (-1 << 3) | 0

  def fromUTF8(b: Array[Byte]): MachineInput =
    new UTF8Input(b, 0, b.length)

  def fromUTF8(b: Array[Byte], start: Int, end: Int): MachineInput =
    new UTF8Input(b, start, end)

  def fromUTF16(s: CharSequence): MachineInput =
    new UTF16Input(s, 0, s.length())

  def fromUTF16(s: CharSequence, start: Int, end: Int): MachineInput =
    new UTF16Input(s, start, end)

  // An implementation of MachineInput for UTF-8 byte arrays.
  // |pos| and |width| are byte indices.
  private class UTF8Input(b: Array[Byte], start: Int, end: Int)
      extends MachineInput {
    if (end > b.length) {
      throw new ArrayIndexOutOfBoundsException(
        "end is greater than length: " + end + " > " + b.length
      )
    }

    override def step(_i: Int): Int = {
      var i = _i
      i += start
      if (i >= end) {
        return EOF
      }

      // UTF-8.  RFC 3629 in five lines:
      //
      // Unicode code points            UTF-8 encoding (binary)
      //         00-7F  (7 bits)   0tuvwxyz
      //     0080-07FF (11 bits)   110pqrst 10uvwxyz
      //     0800-FFFF (16 bits)   1110jklm 10npqrst 10uvwxyz
      // 010000-10FFFF (21 bits)   11110efg 10hijklm 10npqrst 10uvwxyz
      var x = b(i) & 0xff // zero extend
      i += 1
      if ((x & 0x80) == 0) {
        return x << 3 | 1
      } else if ((x & 0xe0) == 0xc0) { // 110xxxxx
        x = x & 0x1f
        if (i >= end) {
          return EOF
        }
        x = x << 6 | b(i) & 0x3f
        i += 1
        return x << 3 | 2
      } else if ((x & 0xf0) == 0xe0) { // 1110xxxx
        x = x & 0x0f
        if (i + 1 >= end) {
          return EOF
        }
        x = x << 6 | b(i) & 0x3f
        i += 1
        x = x << 6 | b(i) & 0x3f
        i += 1
        return x << 3 | 3
      } else { // 11110xxx
        x = x & 0x07
        if (i + 2 >= end) {
          return EOF
        }
        x = x << 6 | b(i) & 0x3f
        i += 1
        x = x << 6 | b(i) & 0x3f
        i += 1
        x = x << 6 | b(i) & 0x3f
        i += 1
        return x << 3 | 4
      }
    }

    override def canCheckPrefix(): Boolean = true

    override def index(re2: RE2, _pos: Int): Int = {
      var pos = _pos
      pos += start
      val i = Utils.indexOf(b, re2.prefixUTF8, pos)
      if (i < 0) i else i - pos
    }

    override def context(_pos: Int): Int = {
      var pos = _pos
      pos += this.start
      var r1 = -1
      if (pos > this.start && pos <= this.end) {
        var start = pos - 1
        r1 = b(start)
        start -= 1
        if (r1 >= 0x80) { // decode UTF-8
          // Find start, up to 4 bytes earlier.
          var lim = pos - 4
          if (lim < this.start) {
            lim = this.start
          }
          while (start >= lim && (b(start) & 0xc0) == 0x80) { // 10xxxxxx
            start -= 1
          }
          if (start < this.start) {
            start = this.start
          }
          r1 = step(start) >> 3
        }
      }
      val r2 = if (pos < this.end) step(pos) >> 3 else -1
      return Utils.emptyOpContext(r1, r2)
    }

    override def endPos(): Int = end
  }

  // |pos| and |width| are in Java "char" units.
  private class UTF16Input(str: CharSequence, start: Int, end: Int)
      extends MachineInput {

    override def step(_pos: Int): Int = {
      var pos = _pos
      pos += start
      if (pos < end) {
        val rune = Character.codePointAt(str, pos)
        val nextPos = pos + Character.charCount(rune)
        val width = nextPos - pos
        rune << 3 | width
      } else {
        EOF
      }
    }

    override def canCheckPrefix(): Boolean = true

    override def index(re2: RE2, _pos: Int): Int = {
      var pos = _pos
      pos += start
      val i = indexOf(str, re2.prefix, pos)
      if (i < 0) i else i - pos
    }

    override def context(_pos: Int): Int = {
      var pos = _pos
      pos += start
      val r1 =
        if (pos > start && pos <= end) Character.codePointBefore(str, pos)
        else -1
      val r2 = if (pos < end) Character.codePointAt(str, pos) else -1
      return Utils.emptyOpContext(r1, r2)
    }

    override def endPos(): Int = end

    private def indexOf(hayStack: CharSequence, needle: String, pos: Int): Int =
      hayStack match {
        case hayStack: String =>
          hayStack.indexOf(needle, pos)
        case hayStack: StringBuilder =>
          hayStack.indexOf(needle, pos)
        case _ =>
          indexOfFallback(hayStack, needle, pos)
      }

    // Modified version of {@link String#indexOf(String) that allows a CharSequence.
    private def indexOfFallback(
        hayStack: CharSequence,
        needle: String,
        _fromIndex: Int
    ): Int = {
      var fromIndex = _fromIndex

      if (fromIndex >= hayStack.length()) {
        return if (needle.isEmpty()) 0 else -1
      }
      if (fromIndex < 0) {
        fromIndex = 0
      }
      if (needle.isEmpty()) {
        return fromIndex
      }

      val first = needle.charAt(0)
      val max = hayStack.length() - needle.length()
      var i = fromIndex

      while (i <= max) {
        // Look for first character.
        if (hayStack.charAt(i) != first) {
          i += 1
          while (i <= max && hayStack.charAt(i) != first) { i += 1 }
        }

        // Found first character, now look at the rest of v2
        if (i <= max) {
          var j = i + 1
          val end = j + needle.length() - 1
          var k = 1
          while (j < end && hayStack.charAt(j) == needle.charAt(k)) {
            j += 1
            k += 1
          }
          if (j == end) {
            // Found whole string.
            return i
          }
        }

        i += 1
      }

      return -1
    }
  }
}
