|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#include "ttokenizer.h"
|
|
Toshihiro Shimizu |
890ddd |
#include <qstring></qstring>
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
namespace TSyntax
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
int Token::getIntValue() const
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
return QString::fromStdString(getText()).toInt();
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
double Token::getDoubleValue() const
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
return QString::fromStdString(getText()).toDouble();
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//===================================================================
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
Tokenizer::Tokenizer()
|
|
Toshihiro Shimizu |
890ddd |
: m_buffer(), m_index(0)
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
3bfa54 |
Tokenizer::Tokenizer(std::string buffer)
|
|
Toshihiro Shimizu |
890ddd |
: m_buffer(), m_index(0)
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
setBuffer(buffer);
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//===================================================================
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
Tokenizer::~Tokenizer()
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
3bfa54 |
void Tokenizer::setBuffer(std::string buffer)
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
m_buffer = buffer + '\0';
|
|
Toshihiro Shimizu |
890ddd |
m_index = 0;
|
|
Toshihiro Shimizu |
890ddd |
m_tokens.clear();
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
bool stringBlock = false;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
int i = 0;
|
|
Toshihiro Shimizu |
890ddd |
const char *s = &m_buffer[0];
|
|
Toshihiro Shimizu |
890ddd |
for (;;) {
|
|
Toshihiro Shimizu |
890ddd |
while (isascii(s[i]) && isspace(s[i]))
|
|
Toshihiro Shimizu |
890ddd |
i++;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
int j = i;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
if (s[i] == '\0') {
|
|
Toshihiro Shimizu |
890ddd |
m_tokens.push_back(Token("", Token::Eos, j));
|
|
Toshihiro Shimizu |
890ddd |
break;
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
if (s[i] == '"') {
|
|
Toshihiro Shimizu |
890ddd |
stringBlock = !stringBlock;
|
|
Toshihiro Shimizu |
890ddd |
m_tokens.push_back(Token("\"", Token::Punct, j));
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
++i;
|
|
Toshihiro Shimizu |
890ddd |
continue;
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
3bfa54 |
std::string token;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
if (stringBlock) {
|
|
Toshihiro Shimizu |
890ddd |
// string block - read mercilessly until either another '"' or EOS
|
|
Shinya Kitaoka |
3bfa54 |
token = std::string(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
while (s[i] != '"' && s[i] != '\0')
|
|
Toshihiro Shimizu |
890ddd |
token.append(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
m_tokens.push_back(Token(token, Token::Ident, j));
|
|
Toshihiro Shimizu |
890ddd |
} else if (isascii(s[i]) && isalpha(s[i]) || s[i] == '_') {
|
|
Toshihiro Shimizu |
890ddd |
// ident
|
|
Shinya Kitaoka |
3bfa54 |
token = std::string(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
while (isascii(s[i]) && (isalpha(s[i]) || s[i] == '_' || isdigit(s[i])))
|
|
Toshihiro Shimizu |
890ddd |
token.append(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
m_tokens.push_back(Token(token, Token::Ident, j));
|
|
Toshihiro Shimizu |
890ddd |
} else if (isascii(s[i]) && isdigit(s[i]) || s[i] == '.') {
|
|
Toshihiro Shimizu |
890ddd |
// number
|
|
Toshihiro Shimizu |
890ddd |
while (isascii(s[i]) && isdigit(s[i]))
|
|
Toshihiro Shimizu |
890ddd |
token.append(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
if (s[i] == '.') {
|
|
Toshihiro Shimizu |
890ddd |
token.append(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
while (isascii(s[i]) && isdigit(s[i]))
|
|
Toshihiro Shimizu |
890ddd |
token.append(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
if ((s[i] == 'e' || s[i] == 'E') &&
|
|
Toshihiro Shimizu |
890ddd |
(isascii(s[i + 1]) && isdigit(s[i + 1]) ||
|
|
Toshihiro Shimizu |
890ddd |
(s[i + 1] == '-' || s[i + 1] == '+') &&
|
|
Toshihiro Shimizu |
890ddd |
isascii(s[i + 2]) && isdigit(s[i + 2]))) {
|
|
Toshihiro Shimizu |
890ddd |
token.append(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
if (s[i] == '-' || s[i] == '+')
|
|
Toshihiro Shimizu |
890ddd |
token.append(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
while (isascii(s[i]) && isdigit(s[i]))
|
|
Toshihiro Shimizu |
890ddd |
token.append(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
m_tokens.push_back(Token(token, Token::Number, j));
|
|
Toshihiro Shimizu |
890ddd |
} else {
|
|
Toshihiro Shimizu |
890ddd |
// punct.
|
|
Toshihiro Shimizu |
890ddd |
if (s[i + 1] != '\0') {
|
|
Shinya Kitaoka |
3bfa54 |
token = std::string(s + i, 2);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
3bfa54 |
const std::string ss[] = {
|
|
Toshihiro Shimizu |
890ddd |
"==", "!=", ">=", "<=", "||", "&&"};
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
const int m = tArrayCount(ss);
|
|
Toshihiro Shimizu |
890ddd |
if (std::find(ss, ss + m, token) != ss + m)
|
|
Toshihiro Shimizu |
890ddd |
i += 2;
|
|
Toshihiro Shimizu |
890ddd |
else
|
|
Shinya Kitaoka |
3bfa54 |
token = std::string(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
} else
|
|
Shinya Kitaoka |
3bfa54 |
token = std::string(1, s[i++]);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
m_tokens.push_back(Token(token, Token::Punct, j));
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
int Tokenizer::getTokenCount() const
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
return m_tokens.size();
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
const Token &Tokenizer::getToken(int index) const
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
assert(0 <= index && index < getTokenCount());
|
|
Toshihiro Shimizu |
890ddd |
return m_tokens[index];
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
void Tokenizer::reset()
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
m_index = 0;
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
const Token &Tokenizer::getToken()
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
return getToken(m_index);
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
Token Tokenizer::nextToken()
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
Token token = getToken();
|
|
Toshihiro Shimizu |
890ddd |
if (m_index + 1 < getTokenCount())
|
|
Toshihiro Shimizu |
890ddd |
m_index++;
|
|
Toshihiro Shimizu |
890ddd |
return token;
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
bool Tokenizer::eos() const
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
return m_index + 1 == getTokenCount();
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
Token Tokenizer::getTokenFromPos(int pos) const
|
|
Toshihiro Shimizu |
890ddd |
{
|
|
Toshihiro Shimizu |
890ddd |
int len = m_buffer.length();
|
|
Toshihiro Shimizu |
890ddd |
if (pos < 0 || pos >= len)
|
|
Toshihiro Shimizu |
890ddd |
return Token(pos);
|
|
Toshihiro Shimizu |
890ddd |
int x = 0;
|
|
Toshihiro Shimizu |
890ddd |
for (int i = 0; i < getTokenCount(); i++) {
|
|
Toshihiro Shimizu |
890ddd |
const Token &token = getToken(i);
|
|
Toshihiro Shimizu |
890ddd |
int y = token.getPos();
|
|
Toshihiro Shimizu |
890ddd |
if (pos < y) {
|
|
Toshihiro Shimizu |
890ddd |
assert(x < y);
|
|
Toshihiro Shimizu |
890ddd |
return Token(x, y - 1);
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
x = y + (int)token.getText().length();
|
|
Toshihiro Shimizu |
890ddd |
if (pos < x)
|
|
Toshihiro Shimizu |
890ddd |
return token;
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
assert(x < len);
|
|
Toshihiro Shimizu |
890ddd |
return Token(x, len - 1);
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//===================================================================
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
} // TSyntax
|