(*
 * QUSOFT MICROSYSTMES
 * Moka
 * Copyright 2003 Frdric Brown
 *)

(*
 *  This file is part of Moka.
 *
 *  Moka is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.

 *  Moka is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.

 *  You should have received a copy of the GNU General Public License
 *  along with Moka; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *)

(*
 * Tokenizer unit defines Tokenizer class that is used to get 'tokens' from the source file. A 'token' is
 * most of the time a string between 2 blanks, but can also be a comment or a
 * native block or expression.
 *)
unit Tokenizer;

interface

uses Util;

type
  TTokenizer = class

  private
    pos: Longint;
    str: AnsiString;
    len: Longint;
    nextNative: Boolean;
    openChar: char;
    closeChar: char;
    last: AnsiString;
  public
    constructor Create(str: AnsiString);
    function hasMoreToken: Boolean; virtual;
    function nextToken: AnsiString; overload;
    function nextToken(pos: Longint): AnsiString; overload;
    function isNextComment:Boolean; virtual;
    function before(s1: AnsiString; s2: AnsiString):Boolean; virtual;
  end;

function getStringVector(s: AnsiString): TStringVector;

implementation

uses Funcs, Globals;

function getStringVector(s: AnsiString): TStringVector;
var
  st: TTokenizer;
  vec: TStringVector;
begin
  st := TTokenizer.Create(s);
  vec := TStringVector.Create(50, 100);

  while (st.hasMoreToken) do
  begin
    vec.add(st.nextToken);
  end;



  getStringVector := vec;
end;

constructor TTokenizer.Create(str: AnsiString);
begin
  pos              := 1;
  self.str         := str;
  len              := length(str);
  nextNative       := false;
  last             := '';
end;

function TTokenizer.isNextComment:Boolean;
begin
  if (pos < len) and (str[pos] = '/') and ((str[pos+1] = '/') or (str[pos+1] = '*')) then
  begin
    isNextComment := true;
    exit;
  end;

  isNextComment := false;
end;

function TTokenizer.hasMoreToken:Boolean;
begin
  while (pos <= len) and est(str[pos], vecEspa) do
  begin
    pos := pos + 1;
  end;

  hasMoreToken := pos <= len;
end;

function TTokenizer.nextToken(pos: Longint): AnsiString;
begin
  self.pos := pos;
  nextToken := nextToken();
end;

function TTokenizer.nextToken: AnsiString;
var
  mot: AnsiString;
  i: Longint;
  lv: Longint;
  posBack: Longint;
begin
  mot := '';

  while (pos <= len) and est(str[pos], vecEspa) do
  begin
    pos := pos + 1;
  end;

  if (pos > len) then
  begin
    nextToken := ''; //Return
    exit;
  end;

  if (nextNative and (last = openChar)) then
  begin
      lv := 1;
      //mot := '(';
      //pos := pos + 1;
      while (pos <= len) do
      begin
        if (str[pos] = openChar) then
        begin
          lv := lv + 1;
          mot := mot + str[pos];
          pos := pos + 1;
        end
        else if (str[pos] = closeChar) then
        begin
          lv := lv - 1;
          if (lv > 0) then
          begin
            mot := mot + str[pos];
            pos := pos + 1;
          end
          else
          begin
            last := mot;
            nextToken := mot;
            break;
          end;
        end
        else
        begin
          mot := mot + str[pos];
          pos := pos + 1;
        end;
      end;
    nextNative := false;
    last := mot;
    nextToken := mot; //Return
    exit;
  end;

  if (pos < len) and (str[pos] = '/') and (str[pos+1] = '/') then
  begin
    mot := '//';
    pos := pos + 2;
    while (pos <= len) and (str[pos] <> #10) and (str[pos] <> #13) do  //mod (pos <= len) and  added
    begin
      mot := mot + str[pos];
      pos := pos + 1;
    end;
    last := mot;
    nextToken := mot; //Return
    exit;
  end;

  if (pos < len) and (str[pos] = '/') and (str[pos+1] = '*') then
  begin
    mot := '/*';
    pos := pos + 2;
    while (pos < len) and not ((str[pos] = '*') and (str[pos+1] = '/')) do
    begin
      mot := mot + str[pos];
      pos := pos + 1;
    end;
    mot := mot + '*/';
    pos := pos + 2;
    last := mot;
    nextToken := mot; //Return
    exit;
  end;

  if est(str[pos], vecDigit) then
  begin
    mot := str[pos];
    while (pos < len) and not (est(str[pos+1], vecEspa) or (commence(str[pos+1], vecSym) and (str[pos+1] <> '.'))) do
    begin
      pos := pos + 1;
      mot := mot + str[pos];
    end;
    pos := pos + 1;
    last := mot;
    nextToken := mot; //Return
    exit;
  end;

  if commence(str[pos], vecSym) then
  begin
    mot := str[pos];
    while (pos <= len) and est(mot + str[pos+1], vecSym) do
    begin
      pos := pos + 1;
      mot := mot + str[pos];
    end;
    pos := pos + 1;
    last := mot;
    nextToken := mot; //Return
    exit;
  end;

  if est(str[pos], vecDelim) then
  begin
    mot := str[pos];
    i := pos;
    pos := pos + 1;
    while (pos <= len) and (str[pos] <> str[i]) do
    begin
      mot := mot + str[pos];
      pos := pos + 1;
      if (pos > 1) and (str[pos-1] = vecEscDelim[getPos(str[i], vecDelim)]) then
      begin
        mot := mot + str[pos];
        pos := pos + 1;
      end;
    end;
    mot := mot + str[pos];
    pos := pos + 1;
    (* Ajout pour supporter ANSI strings *)
    if (pos < len) and ((str[pos] = 'a') or (str[pos] = 'A')) then
    begin
      mot := mot + str[pos];
      pos := pos + 1;
    end;

    last := mot;
    nextToken := mot; //Return
    exit;
  end
  else
  begin
    mot := str[pos];
    pos := pos + 1;
    while not est(str[pos], vecEspa) and not commence(str[pos], vecSym) do
    begin
      mot := mot + str[pos];
      pos := pos + 1;
    end;
    if (mot = 'native') then
    begin
      posBack := pos;

      if (nextToken(posBack) = '.') then
      begin
        nextToken();
        if (nextToken() = '(') then
        begin
          nextNative := true;
          openChar := '(';
          closeChar := ')';
        end;
      end
      else if (nextToken(posBack) = '(') then
      begin
        nextNative := true;
        openChar := '(';
        closeChar := ')';
      end
      else
      begin
        pos := posBack;
        if before('{', ';') then
        begin
          nextNative := true;
          openChar := '{';
          closeChar := '}';
        end;
      end;
      
      pos := posBack;
    end
    else if (mot = 'asm') then
    begin
      nextNative := true;
      openChar := '{';
      closeChar := '}';
    end;
    last := mot;
    nextToken := mot; //Return
    exit;
  end;

  last := mot;
  nextToken := '';
end;

function TTokenizer.before(s1: AnsiString; s2: AnsiString):Boolean;
var
  pos: Longint;
  str: AnsiString;
begin
  pos := self.pos;
  before := true;

  while (self.hasMoreToken) do
  begin
    str := self.nextToken();
    if (str = s1) then
    begin
      self.pos := pos;
      exit;
    end
    else if (str = s2) then
    begin
      self.pos := pos;
      before := false;
      exit;
    end;
  end;

  self.pos := pos;
end;

end.


