Blob Blame Raw


#include "ttokenizer.h"
#include <QString>

namespace TSyntax {

int Token::getIntValue() const {
  return QString::fromStdString(getText()).toInt();
}

double Token::getDoubleValue() const {
  return QString::fromStdString(getText()).toDouble();
}

//===================================================================

Tokenizer::Tokenizer() : m_buffer(), m_index(0) {}

//-------------------------------------------------------------------

Tokenizer::Tokenizer(std::string buffer) : m_buffer(), m_index(0) {
  setBuffer(buffer);
}

//===================================================================

Tokenizer::~Tokenizer() {}

//-------------------------------------------------------------------

void Tokenizer::setBuffer(std::string buffer) {
  m_buffer = buffer + '\0';
  m_index  = 0;
  m_tokens.clear();

  bool stringBlock = false;

  int i         = 0;
  const char *s = &m_buffer[0];
  for (;;) {
    while (isascii(s[i]) && isspace(s[i])) i++;

    int j = i;

    if (s[i] == '\0') {
      m_tokens.push_back(Token("", Token::Eos, j));
      break;
    }

    if (s[i] == '"') {
      stringBlock = !stringBlock;
      m_tokens.push_back(Token("\"", Token::Punct, j));

      ++i;
      continue;
    }

    std::string token;

    if (stringBlock) {
      // string block - read mercilessly until either another '"' or EOS
      token = std::string(1, s[i++]);

      while (s[i] != '"' && s[i] != '\0') token.append(1, s[i++]);

      m_tokens.push_back(Token(token, Token::Ident, j));
    } else if ((isascii(s[i]) && isalpha(s[i])) || s[i] == '_') {
      // ident
      token = std::string(1, s[i++]);

      while (isascii(s[i]) && (isalpha(s[i]) || s[i] == '_' || isdigit(s[i])))
        token.append(1, s[i++]);

      m_tokens.push_back(Token(token, Token::Ident, j));
    } else if ((isascii(s[i]) && isdigit(s[i])) || s[i] == '.') {
      // number
      while (isascii(s[i]) && isdigit(s[i])) token.append(1, s[i++]);

      if (s[i] == '.') {
        token.append(1, s[i++]);

        while (isascii(s[i]) && isdigit(s[i])) token.append(1, s[i++]);

        if ((s[i] == 'e' || s[i] == 'E') &&
            (((isascii(s[i + 1]) && isdigit(s[i + 1])) ||
             s[i + 1] == '-' || s[i + 1] == '+') && isascii(s[i + 2]) &&
                 isdigit(s[i + 2]))) {
          token.append(1, s[i++]);

          if (s[i] == '-' || s[i] == '+') token.append(1, s[i++]);

          while (isascii(s[i]) && isdigit(s[i])) token.append(1, s[i++]);
        }
      }
      m_tokens.push_back(Token(token, Token::Number, j));
    } else {
      // punct.
      if (s[i + 1] != '\0') {
        token = std::string(s + i, 2);

        const std::string ss[] = {"==", "!=", ">=", "<=", "||", "&&"};

        if (std::find(std::begin(ss), std::end(ss), token) != std::end(ss))
          i += 2;
        else
          token = std::string(1, s[i++]);
      } else
        token = std::string(1, s[i++]);

      m_tokens.push_back(Token(token, Token::Punct, j));
    }
  }
}

//-------------------------------------------------------------------

int Tokenizer::getTokenCount() const { return m_tokens.size(); }

//-------------------------------------------------------------------

const Token &Tokenizer::getToken(int index) const {
  assert(0 <= index && index < getTokenCount());
  return m_tokens[index];
}

//-------------------------------------------------------------------

void Tokenizer::reset() { m_index = 0; }

//-------------------------------------------------------------------

const Token &Tokenizer::getToken() { return getToken(m_index); }

//-------------------------------------------------------------------

Token Tokenizer::nextToken() {
  Token token = getToken();
  if (m_index + 1 < getTokenCount()) m_index++;
  return token;
}

//-------------------------------------------------------------------

bool Tokenizer::eos() const { return m_index + 1 == getTokenCount(); }

//-------------------------------------------------------------------

Token Tokenizer::getTokenFromPos(int pos) const {
  int len = m_buffer.length();
  if (pos < 0 || pos >= len) return Token(pos);
  int x = 0;
  for (int i = 0; i < getTokenCount(); i++) {
    const Token &token = getToken(i);
    int y              = token.getPos();
    if (pos < y) {
      assert(x < y);
      return Token(x, y - 1);
    }
    x = y + (int)token.getText().length();
    if (pos < x) return token;
  }
  assert(x < len);
  return Token(x, len - 1);
}

//===================================================================

}  // namespace TSyntax