@q file: tok_can.w@> @q% Copyright Dave Bone 1998 - 2015@> @q% /*@> @q% This Source Code Form is subject to the terms of the Mozilla Public@> @q% License, v. 2.0. If a copy of the MPL was not distributed with this@> @q% file, You can obtain one at http://mozilla.org/MPL/2.0/.@> @q% */@> @** Token container structure, templates, and functions.\fbreak The 2 specialized containers |tok_can| for tree walks and |tok_can| for raw character fetching have been optimized to eliminate the ``jit'' fetching of token for speed reasons: elimination of read mutex. See ``Notes to myself'' on discussion. This leaves the |tok_can| as unsafe. It is used internally by the library to GPS tokens against their opened files to line / character position. Sooooo, Be Ware the .... @(wtok_can.cpp@>= @; @; @; @; using namespace NS_yacco2_characters; @; @; @ @= // accrued |tok_can| code @*2 Sour Apple on template definition.\fbreak Circa December 2005, Apple \CPLUSPLUS/ gcc 4.0 compiler honks on preprocessing the |Tok_can| template definition. The template has not been instantiated but its prototype definition preprocessed into a holding source macro for future code substitution and compiling --- AKA instantiation. Unfortunately gcc 4.0 expects all prototype variables declared before preprocessing the template prototype takes place. For example variables |LOCK_MUTEX|, |UNLOCK_MUTEX|, |PTR_LR1_eog__|, and |YACCO2_T__| in template |Tok_can| below aggravates the compiler and gave me a headache. All other \CPLUSPLUS/ compilers tried like Intel \CPLUSPLUS/ 9.0, HP \CPLUSPLUS/ 6.x and 7.1 for VMS Alpha, and Microsoft's Visual Studio c++ 7.0 and 2005 all work. Alas portability is extremely trying. Am i being bruised by software savants? The work around is declare these items before the template definition. See |Notes to myself| to their response and correct position to my perceived problem. @+= extern void LOCK_MUTEX(yacco2::MUTEX& Mu); extern void UNLOCK_MUTEX(yacco2::MUTEX& Mu); extern void LOCK_MUTEX_OF_CALLED_PARSER (yacco2::MUTEX& Mu,yacco2::Parser& parser,const char* Text); extern void UNLOCK_MUTEX_OF_CALLED_PARSER (yacco2::MUTEX& Mu,yacco2::Parser& parser,const char* Text); @*2 |Tok_can| template.\fbreak |tok_base| forces regularity across the |tok_can| containers. |wtok_can.cpp| for |tok_can| containers of ifstream, string, and tree. @+= struct tok_base{ tok_base(USINT RW):r_w_cnt__(RW){}; virtual yacco2::UINT size()=0; virtual yacco2::CAbs_lr1_sym* operator[](yacco2::UINT Pos)=0; virtual void push_back(yacco2::CAbs_lr1_sym& Tok)=0; virtual void clear()=0; virtual bool empty()=0; USINT r_w_cnt__; };@/ template class tok_can:public tok_base{ public:@/ typedef Container value_type; typedef typename Container::size_type size_type; typedef typename Container::difference_type difference_type; typedef typename Container::iterator iterator; typedef typename Container::const_iterator const_iterator; typedef typename Container::reverse_iterator reverse_iterator; typedef typename Container::const_reverse_iterator const_reverse_iterator; typedef typename Container::pointer pointer; typedef typename Container::const_pointer const_pointer; typedef typename Container::reference reference; typedef typename Container::const_reference const_reference; tok_can():tok_base(1),pos__(0){}; ~tok_can(){}; yacco2::CAbs_lr1_sym* operator[](yacco2::UINT Pos){ if (Pos >= container__.size()){ if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::tok_can token eog: " @/ << PTR_LR1_eog__<< " pos: " << Pos << FILE_LINE << std::endl; @; } return PTR_LR1_eog__; } CAbs_lr1_sym* tok_(0); if(r_w_cnt__ > 1){ @ tok_ = container__[Pos]; @ }else{ tok_ = container__[Pos]; } if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::tok_can token: " << tok_->id__ @/ << " *: " << tok_ << " pos: " << Pos@/ << " enum: " << tok_->enumerated_id__ << '"' << tok_->id__ << '"' << FILE_LINE << std::endl; yacco2::lrclog << "\t\t::GPS FILE: "; EXTERNAL_GPSing(tok_) yacco2::lrclog << " GPS LINE: " << tok_->tok_co_ords__.line_no__ << " GPS CHR POS: " << tok_->tok_co_ords__.pos_in_line__ << FILE_LINE << std::endl; @; } return tok_; }; yacco2::UINT pos(){return pos__;}; yacco2::UINT size(){return container__.size();}; bool empty(){return container__.empty();}; void push_back(yacco2::CAbs_lr1_sym& Tok){container__.push_back(&Tok);}; void remove(){}; void clear(){container__.clear();}; Container& container(){return container__;}; iterator begin(){return container__.begin();}; iterator end(){return container__.end();}; private:@/ yacco2::UINT pos__; bool have_1st_rec__; Container container__; }; @*2 Specialized |tok_can| containers: ifstream and string. \fbreak They read character streams from external file or string contexts. The string container's contents can be added to dynamically (concatenated) using |set_string| procedure while parsing is taking place. The caveat is it must be before the end-of-string condition has been met. If a GPS token is passed to it at ctor creation time, the errors reported will be relative to the GPSed file. It tries hard to keep these co-ordinates relative to the spawning token who supplied the string: string new lines are not respected as this would throw off the error reporting relative the external file. |reuse_string| allows one to keep a global string token container and to reuse it. @+= typedef tok_base token_container_type;@/ typedef tok_can< std::vector > GAGGLE;@/ typedef GAGGLE::iterator GAGGLE_ITER;@/ typedef GAGGLE TOKEN_GAGGLE;@/ typedef GAGGLE_ITER TOKEN_GAGGLE_ITER;@/ template<> class tok_can:public yacco2::tok_base{ public:@/ tok_can(); tok_can(const char* File_name); ~tok_can(); std::string& file_name(); void set_file_name(const char* File_name); yacco2::CAbs_lr1_sym* operator[](yacco2::UINT Pos); yacco2::UINT pos(); yacco2::UINT size(); bool empty(); void push_back(yacco2::CAbs_lr1_sym& Tok); void remove(); void clear(); TOKEN_GAGGLE& container(); bool file_ok(); void open_file(); void close_file(); private:@/ std::ifstream file__; yacco2::UINT pos_; bool have_1st_rec__; std::ios::int_type eof_pos_; bool file_ok_; UINT line_no__; UINT pos_in_line__; TOKEN_GAGGLE container__; std::string file_name__; yacco2::UINT file_no__; }; template<> class tok_can:public yacco2::tok_base{ public:@/ tok_can(); tok_can(const char* String,CAbs_lr1_sym* GPS=0); ~tok_can(); void set_string(const char* String); void reuse_string(const char* String,CAbs_lr1_sym* GPS = 0); yacco2::CAbs_lr1_sym* operator[](yacco2::UINT Pos); yacco2::UINT pos(); yacco2::UINT size(); bool empty(); void push_back(yacco2::CAbs_lr1_sym& Tok); void remove(); void clear(); TOKEN_GAGGLE& container(); std::string* string_used(); void set_gps(CAbs_lr1_sym* Gps); yacco2::CAbs_lr1_sym* gps_used(); private:@/ std::string string__; yacco2::UINT pos_; bool have_1st_rec__; std::ios::int_type eof_pos_; UINT line_no__; UINT pos_in_line__; TOKEN_GAGGLE container__; CAbs_lr1_sym* eof_sym_; yacco2::UINT file_no__; int real_start_pos_in_line_; yacco2::CAbs_lr1_sym* gps__; }; @*2 Tree container and its related paraphernalia.\fbreak There's the functor for the tree walker that includes the stack, a user functor that executes when the node is visited, a set filter mechanism to include or exclude node types, and the tree node itself. Filters are just sets of Tes enumerated ids of T vocabulary. All T types lr, rc, error, and T are allowed. A filter type of bypass or accept makes walking the trees easier in selecting T. A nil based filter implies all Tes are accepted. The tree walkers supported are pre and post fix, and various flavours of breadth walks. A forest walk refines the scope of the tree to be walked even though the forest node can be linked to the tree. Due to the nature of a binary tree, the infix tree walker is not supported. The tree structure is provided by the |AST| definition which is just a tree node wrapper for the grammar's vocabulary. Its content is abstracted to |CAbs_lr1_sym| as it has no psychic powers of the future grammar user. In tandom with the enumeration value of the abstracted symbol, the casting operator brings its out of the closet so-to-speak. Dominance is provided by the |lt_| link while |rt_| provides the equivalence link. To aid in walking the tree, the |pr_| link provides the backward link to its immediate caller. This link can be its older sibling, parent when its the first child, to nil when the node is the root of the tree. @+= struct AST; struct ast_base_stack;@/ typedef std::set int_set_type;@/ typedef int_set_type::iterator int_set_iter_type;@/ typedef std::vector ast_vector_type;@/ typedef std::vector ast_accept_node_type;@/ typedef enum{bypass_node,accept_node,stop_walking} functor_result_type;@/ typedef ast_vector_type Type_AST_ancestor_list; template struct ast_functor{ virtual functor_result_type operator()(T Ast_env)=0; };@/ typedef ast_functor Type_AST_functor;@/ struct ast_base_stack{ typedef enum n_action{init,left,visit,right,eoc} n_action_; struct s_rec{ AST* node_; n_action_ act_; }; ast_base_stack(); ast_base_stack(Type_AST_functor* Action,yacco2::int_set_type* Filter=0,bool Accept_opt=true); s_rec* stk_rec(yacco2::INT I); void pop(); void push(AST& Node,ast_base_stack::n_action Action); yacco2::INT cur_stk_index(); s_rec* cur_stk_rec(); yacco2::INT idx_;// index std::vector stk_; Type_AST_functor* action_; s_rec* cur_stk_rec_; yacco2::int_set_type* filter_; bool accept_opt_; }; struct ast_stack{ ast_stack(Type_AST_functor* Action,yacco2::int_set_type* Filter=0,bool Accept_opt=true); ast_base_stack base_stk_; virtual void exec()=0; virtual void advance()=0; }; @*2 Tree node definition |AST|.\fbreak Note on linkages:\fbreak \ptindent{1) lt parent to son relationship: dominant order} \ptindent{2) rt older to younger relationship: equivalence order} \ptindent{3) pr points to previous older brother or parent} The ``pr'' relationship provides a backward link in the tree. It's just a pointer to an older node in the tree: a younger brother linking to its older brother or the 1st son linking to its parent. A dink node (double income no kids) would have lt null: no kids. Within its surrounding, A dink node could still be a son or a forest. @+= struct AST{ AST(yacco2::CAbs_lr1_sym& Obj); AST(); ~AST(); static AST* restructure_2trees_into_1tree(AST& S1,AST& S2); static void crt_tree_of_1son(AST& Parent,AST& S1); static void crt_tree_of_2sons(AST& Parent,AST& S1,AST& S2); static void crt_tree_of_3sons(AST& Parent,AST& S1,AST& S2,AST& S3); static void crt_tree_of_4sons(AST& Parent,AST& S1,AST& S2,AST& S3,AST& S4); static void crt_tree_of_5sons(AST& Parent,AST& S1,AST& S2,AST& S3,AST& S4,AST& S5); static void crt_tree_of_6sons(AST& Parent,AST& S1,AST& S2,AST& S3,AST& S4,AST& S5,AST& S6); static void crt_tree_of_7sons(AST& Parent,AST& S1,AST& S2,AST& S3,AST& S4,AST& S5,AST& S6 ,AST& S7); static void crt_tree_of_8sons(AST& Parent,AST& S1,AST& S2,AST& S3,AST& S4,AST& S5 ,AST& S6,AST& S7,AST& S8); static void crt_tree_of_9sons(AST& Parent,AST& S1,AST& S2,AST& S3,AST& S4,AST& S5 ,AST& S6,AST& S7,AST& S8,AST& S9); static void join_pts(AST& Parent,AST& Sibling); static void join_sts(AST& Elder_sibling,AST& Younger_sibling); static void ast_delete(AST& Node,bool Due_to_abort=false); static AST* find_depth(AST& Node,yacco2::INT Enum); static AST* find_breadth(AST& Node,yacco2::INT Enum); static yacco2::CAbs_lr1_sym* content(AST& Node); static AST* get_1st_son(AST& Node); static AST* get_2nd_son(AST& Node); static AST* get_3rd_son(AST& Node); static AST* get_4th_son(AST& Node); static AST* get_5th_son(AST& Node); static AST* get_6th_son(AST& Node); static AST* get_7th_son(AST& Node); static AST* get_8th_son(AST& Node); static AST* get_9th_son(AST& Node); static AST* get_spec_child(AST& Tree,yacco2::INT Cnt); static AST* get_child_at_end(AST& Tree); static AST* add_child_at_end(AST& Tree,AST& Child); static AST* get_younger_sibling(AST& Child,yacco2::INT Pos); static AST* get_older_sibling(AST& Child,yacco2::INT Pos); static AST* get_youngest_sibling(AST& Child); static AST* get_parent(AST& Child); static AST* common_ancestor@/ (Type_AST_ancestor_list& ListA,Type_AST_ancestor_list& ListB); static AST* brother(AST& Node); static AST* previous(AST& Node); static void zero_1st_son(AST& Node); static void zero_2nd_son(AST& Node); static void zero_brother(AST& Node); static void zero_previous(AST& Node); static void zero_content(AST& Node); static void set_content(AST& Node,yacco2::CAbs_lr1_sym& Sym); static void set_content_wdelete(AST& Node,yacco2::CAbs_lr1_sym& Sym); static void set_previous(AST& Node,AST& Previous_node); static void wdelete(AST& Node,bool Wdelete); static bool wdelete(AST& Node); static void replace_node(AST& Old_to,AST& New_to); static void relink(AST& Previous,AST& Old_to,AST& New_to); static void relink_between(AST& Previous,AST& Old_to,AST& New_to); static void relink_after(AST& Previous,AST& New_to); static void relink_before(AST& Previous,AST& New_to); static void add_son_to_tree(AST& Parent,AST& Son); static AST* divorce_node_from_tree(AST& Node); static AST* clone_tree(AST& Node_to_copy,AST* Calling_node ,ast_base_stack::n_action Relation=ast_base_stack::init); AST* lt_; AST* rt_; AST* pr_;// caller who links to it yacco2::CAbs_lr1_sym* obj_; bool wdelete_; }; @*2 Tree |tok_can | container with accept / bypass |filters|.\fbreak The interesting part is use of the |int_set_type| filter and its companion |Accept_opt| in the constructor of the |tree_walker|. The |int_set_type| filter just contains the Terminal enumerations to either accept or bypass. If these parameters are defaulted, there is no |int_set_type| filter present so the complete tree is handed off for consumption of each node's content. Having |Accept_opt| {\bf true} means accept only the items in the set while {\bf false} means bypass the items found in the filter set when the tree is walked. This is a very powerful way to flatten a branching structure. Please note |nodes_visited_| holds the terminals accepted by the filter in the traversal order. It is an array of |AST*|. To access a token's tree node, u need the container address. If a grammar is receiving its terminals by a walked tree, casting the container address to |tok_can*| allows one to access the container's tree node vector: |nodes_visited()|. The below code fetches the container's address from a piece of syntax~directed~code of a grammar's rule:\fbreak \ptindent{|tok_can* can = (tok_can*)parser()->token_supplier();|} To fetch a specific tree node of a token, u can use the container's ast function giving it the position within the container: Remember its relative to 0. For example u want to fetch the tree node associated with the 1st token using the above container:\fbreak \ptindent{|AST* first_tok_tree = can->ast(0);|} The other note is a shifted token on the parse stack is {\bf not the current token}. Why? The current token is the lookahead token and the one u want is on parse stack! Here is a sample code snippet to get the shifted token's tree address using the above container with another way to fetch its tree:\fbreak \ptindent{|AST* t = (*can->nodes_visited())[parser()->current_token_pos()-1];|} Why use parser's |current_token_pos()| instead of the container's |pos()|? Good question: they are equivalent except when one is reusing the container to deliver tokens to another grammar. The recycled container's pos {\bf contains the residue from the previous reads: its last token position}. Ugh but this is reality. The sundry tree routines can now be used to walk or fetch the contents of the local tree node. \fbreak \fbreak {\bf Caveat: EOG Handling.}\fbreak Make sure u add an |eog| node to the end of the tree so that proper end-of-tree handling is done. U do this by:\fbreak \ptindent{|AST* eog_t = new AST(*yacco2::PTR_LR1_eog__);|} \ptindent{then add the node to the end-of-the-tree using one of the tree linking routines} \fbreak If it is not added, an |eog| token is returned but there is no associated tree node. So the last token read is not the lookahead but the previous (shifted) token. If u are using an accept filter, make sure the |eog| is included in the accept set so that |eog| gets its associated end-of-tree node. Please see ``Tree containers, functors, and walkers'' later in this document for their descriptions. \fbreak \fbreak {\bf Another way to access the container and its contents.}\fbreak Set up a filter and ``for loop'' the container to fill it up while the body of the for loop can done specific activity. This method can be done outside of the parsing activity or within ``syntax directed code'' of a grammar. Just give the tree and rip thru it using the filter. \fbreak \let\setuplistinghook = \linenumberedlisting \listing{"/usr/local/yacco2/diagrams/tokcanaccess.txt"} \fbreak \let\setuplistinghook = \relax \fbreak @+= template<> class tok_can:public yacco2::tok_base{ public:@/ tok_can (ast_stack& Walker); ~tok_can(); yacco2::CAbs_lr1_sym* operator[](yacco2::UINT Pos); yacco2::UINT pos(); yacco2::UINT size(); bool empty(); void push_back(yacco2::AST& Node); void push_back(yacco2::CAbs_lr1_sym& Node); void remove(); void clear(); yacco2::ast_stack& container(); std::vector* nodes_visited(); yacco2::AST* ast(yacco2::UINT Pos); yacco2::INT accept_node_level(yacco2::UINT Pos); private:@/ volatile yacco2::UINT pos_; bool have_1st_rec__; bool tree_end_reached__; yacco2::ast_vector_type nodes_visited_; yacco2::ast_accept_node_type accept_node_level_; yacco2::ast_stack& traverse_; }; @*2 String |tok_can| implementation. @+= yacco2::tok_can::tok_can() :tok_base(1) ,pos_(0) ,have_1st_rec__(false) ,file_no__(MAX_USINT) ,line_no__(START_LINE_NO) ,pos_in_line__(START_CHAR_POS) ,string__(std::string()) ,eof_sym_(0) ,real_start_pos_in_line_(START_CHAR_POS) ,eof_pos_(0) ,gps__(0) {} yacco2::tok_can::tok_can(const char* String,CAbs_lr1_sym* GPS)@/ :tok_base(1) ,pos_(0) ,have_1st_rec__(false) ,file_no__(MAX_USINT) ,line_no__(START_LINE_NO) ,pos_in_line__(START_CHAR_POS) ,string__(String) ,eof_sym_(0) ,real_start_pos_in_line_(START_CHAR_POS) ,eof_pos_(0) ,gps__(GPS) { if(GPS == 0) return; line_no__ = GPS->tok_co_ords__.line_no__; pos_in_line__ = GPS->tok_co_ords__.pos_in_line__; file_no__ = GPS->tok_co_ords__.external_file_id__; real_start_pos_in_line_ = pos_in_line__; } void yacco2::tok_can::set_gps(CAbs_lr1_sym* GPS){ gps__ = GPS; if(GPS == 0) return; line_no__ = GPS->tok_co_ords__.line_no__; pos_in_line__ = GPS->tok_co_ords__.pos_in_line__; file_no__ = GPS->tok_co_ords__.external_file_id__; real_start_pos_in_line_ = pos_in_line__; } yacco2::CAbs_lr1_sym* yacco2::tok_can::gps_used(){ return gps__; } yacco2::tok_can::~tok_can(){} bool yacco2::tok_can::empty(){ if(string__.empty() == true) return YES; return NO; } void yacco2::tok_can::reuse_string(const char* Str,CAbs_lr1_sym* GPS){ string__.erase(); string__ += Str; file_no__ = MAX_USINT; line_no__ = START_LINE_NO; pos_in_line__ = START_CHAR_POS; eof_sym_ = 0; real_start_pos_in_line_ = START_CHAR_POS; eof_pos_ = 0; if(GPS == 0) return; set_gps(GPS); } @*2 |Tok_canoperator[]|.\fbreak This is the heart of the container. Three things are of interest: the just-in-time character access, the 2 ``eog'' token symbols added to the end-of-file condition, and how to report errors inside the string relative to the file that provided the string: its contents cannot increment new line with character alignment. Why? When u report an error back to the original file containing the string, it is GPSed to it and not its contents. The string's line number stays the same while the line position increments towards the right without regard to the new line character. This allows the container to be handled like its brethern within the grammar context. Note: |map_char_to_raw_char_sym| maintains the line:character segmentation as the string is being read and so must be re-aligned afterwards. The file no reference to the outside source is hardwired using the MAX\_USINT symbol when there is possibly no outside file referenced: eg, internal memory string for the parsing. A bit of a kludge (ahum) as this condition goes against the 0..n declaration for external file numbers. This is watched for when the external file out-of-bounds occurs: reported is ``No external file''. @+= yacco2::CAbs_lr1_sym* yacco2::tok_can::operator[](yacco2::UINT Pos){ CAbs_lr1_sym* sym(0); if(eof_pos_ == EOF) return eof_sym_; fetch_char:@/ if(have_1st_rec__ == false){ have_1st_rec__ = true; pos_ = 0; }else{ if(Pos <= pos_){ return container__[Pos]; } ++pos_; } if(r_w_cnt__ > 1){ @ } for(;;){// fetch token somewhere in char stream char c; if(pos_ >= string__.size()){// eof: add two lrk eog eof_pos_ = EOF; ++pos_;// 2nd eog pos, same token used sym = RC__.map_char_to_raw_char_sym(EOF_CHAR_SUB,file_no__,pos_ ,&line_no__,&pos_in_line__); eof_sym_ = sym; container__.push_back(*sym); container__.push_back(*sym); return sym; } c = string__[pos_]; convert_char_to_unsigned_value:@/ unsigned char uc = c; UINT slno = line_no__; sym = RC__.map_char_to_raw_char_sym(uc,file_no__,pos_,&line_no__,&pos_in_line__); if(gps__ != 0){// re-align against the proxy token line_no__ = slno; pos_in_line__=real_start_pos_in_line_+pos_; } container__.push_back(*sym); if(Pos == pos_) break; ++pos_; continue; }; if(r_w_cnt__ > 1){ @ } return sym; } @*2 |Tok_can|size. @+= yacco2::UINT yacco2::tok_can::size(){ return string__.size(); } @*2 Balance of sundry routines. @+= yacco2::UINT yacco2::tok_can::pos(){return pos_;} void yacco2::tok_can::push_back(yacco2::CAbs_lr1_sym& Tok){ container__.push_back(Tok); } void yacco2::tok_can::clear(){container__.clear(); pos_=0; have_1st_rec__=false; file_no__=MAX_USINT; line_no__=START_LINE_NO; pos_in_line__=START_CHAR_POS; string__.clear(); eof_sym_=0; real_start_pos_in_line_=START_CHAR_POS; eof_pos_ = 0; gps__ = 0; } TOKEN_GAGGLE& yacco2::tok_can::container(){ return container__; } void tok_can::remove(){} void yacco2::tok_can::set_string(const char* String){ string__ += String; } std::string* yacco2::tok_can::string_used(){return &string__;}; @*2 External file |tok_can| implementation.\fbreak Removed the ``jit'' approach and now at |open_file| time the complete input is placed into the container. See ``Notes to myself'' on its discussion. @+= yacco2::tok_can::tok_can() :tok_base(1) ,pos_(0) ,have_1st_rec__(false) ,eof_pos_(EOF) ,file_ok_(NO) ,line_no__(START_LINE_NO) ,pos_in_line__(START_CHAR_POS) ,file_name__(std::string()) {} yacco2::tok_can::tok_can(const char* File_name)@/ :tok_base(1) ,pos_(0) ,have_1st_rec__(false) ,eof_pos_(EOF) ,file_ok_(NO) ,line_no__(START_LINE_NO) ,pos_in_line__(START_CHAR_POS) ,file_name__(File_name) { open_file(); } yacco2::tok_can::~tok_can(){ if(file_ok_ == YES) file__.close(); } bool yacco2::tok_can::empty(){ if(have_1st_rec__ == false) return YES; return NO; } @*2 |File_ok|.\fbreak By testing after the ctor has tried to open the file, one can do whatever is appropriate in a bad file situation. Originally a bad file condition was thrown. Now it's more gentle. @+= bool yacco2::tok_can::file_ok(){ return file_ok_; } @*2 |Tok_canoperator[]|.\fbreak This is the heart of the container. Two things are of interest: the just-in-time character access, and the 2 ``eog'' token symbols added to the end-of-file condition. This allows the container to be handled like its brethern within the grammar context. Note: |map_char_to_raw_char_sym| maintains the line:character segmentation as the file is being read. @+= yacco2::CAbs_lr1_sym* yacco2::tok_can::operator[](yacco2::UINT Pos){ if(file_ok_ == NO){ char a[BUFFER_SIZE]; @.Err tok\_canoperator[]...@> yacco2::KCHARP msg = "tok_canoperator[] trying to access file that is bad: %s, position %i "; sprintf(a,msg,file_name__.c_str(),Pos); Yacco2_faulty_precondition(a,__FILE__,__LINE__); exit(1); } CAbs_lr1_sym* sym(0); if(eof_pos_ == EOF && Pos >= pos_){ return container__[pos_]; } fetch_char:@/ if(have_1st_rec__ == false){ have_1st_rec__ = true; pos_ = 0; }else{ if(Pos <= pos_){ return container__[Pos]; } ++pos_; } if(r_w_cnt__ > 1){ @ } for(;;){// fetch token somewhere in char stream char c; file__ >> c; if((file__.good() == false) or (file__.eof() == true)){// eof: add two lrk eog eof_pos_ = EOF; ++pos_;// 2nd eog pos, same token used sym = RC__.map_char_to_raw_char_sym(EOF_CHAR_SUB,file_no__,pos_ ,&line_no__,&pos_in_line__); container__.push_back(*sym); container__.push_back(*sym); return sym; } convert_char_to_unsigned_value:@/ unsigned char uc = c; sym = RC__.map_char_to_raw_char_sym(uc,file_no__,pos_,&line_no__,&pos_in_line__); container__.push_back(*sym); if(Pos == pos_) break; ++pos_; continue; }; if(r_w_cnt__ > 1){ @ } return sym; } @*2 |Tok_can|size.\fbreak Due to the just-in-time attitude, the container's size has no meaning. Its size indicates the number of symbols currently in-process and not the total number of characters in the file stream. I guess I could try to use the file system to figure out its size but I'm not sure if this is portable as in the case of line delimiters: DEC versus ASCII. So, just fake it and allow the end-of-file situation deal with it. Use of the ``maximum signed integer'' constant does the trick in faking it as a very big text file. Who in their mind would create 2 billion characters?: ahhh wait for the XML crowd. Now who in hell uses this test? My parser does in accessing the token containers by use of the constraint facility testing for possible subscript overflow. @+= yacco2::UINT yacco2::tok_can::size(){ return INT_MAX; } @*2 |tok_can::open_file|.\fbreak This routine allows one to delay the use of an external file by declaring the container without the file name. Before its use, the file name is supplied by the |set_file_name| method and then the |open_file| method called. For example the container could be declared globally but one supplies the file to-be-read as in passing the file name thru the program's main parameter facility. Removed the ``jit'' attitude and now read all its input into the container for speeeeed reasons --- this is not a William Borough's novel. @+= void yacco2::tok_can::open_file(){ CAbs_lr1_sym*sym(0); open_file:@/ file__.open(file_name__.c_str(),std::ios::in); if(file__.is_open())goto filename_opened; else goto filename_bad; filename_opened:{@/ file_ok_= YES; ++yacco2::FILE_CNT__; have_1st_rec__= true; pos_= 0; if(yacco2::FILE_CNT__==1){ std::string empty; yacco2::FILE_TBL__.push_back(empty); } yacco2::STK_FILE_NOS__.push_back(yacco2::FILE_CNT__); file_no__= yacco2::STK_FILE_NOS__.back(); yacco2::FILE_TBL__.push_back(file_name__); eof_pos_= 0; set_dont_skip_any_chars:@/ file__>>std::noskipws; for(;eof_pos_!=EOF;++pos_){ char c; if (file__.good() == true){ file__>>c; } if (file__.eof()==true) goto eoroad; if (file__.fail()==true) goto eoroad; convert_char_to_unsigned_value: unsigned char uc= c; sym= RC__.map_char_to_raw_char_sym(uc,file_no__,pos_,&line_no__,&pos_in_line__); container__.push_back(*sym); } } filename_bad:{@/ eof_pos_= EOF; file_ok_= NO; return; } eoroad:{@/ eof_pos_= EOF; ++pos_; sym= RC__.map_char_to_raw_char_sym(EOF_CHAR_SUB,file_no__,pos_ ,&line_no__,&pos_in_line__); container__.push_back(*sym);// 2 eof added really 2 eogs container__.push_back(*sym); return; } } @*2 |tok_can::close_file|.\fbreak This routine allows one close a file prematurely or to reuse the token container for another round of parsing. @+= void yacco2::tok_can::close_file(){ if(file__.is_open()){ if(file_ok_ == YES){ file__.close(); } } file_ok_ = false; } @*2 Balance of sundry routines. @+= yacco2::UINT yacco2::tok_can::pos(){return pos_;} void yacco2::tok_can::push_back(yacco2::CAbs_lr1_sym& Tok){ container__.push_back(Tok); } void yacco2::tok_can::clear(){ container__.clear(); pos_=0; have_1st_rec__=false; eof_pos_=EOF; file_ok_=NO; line_no__=START_LINE_NO; pos_in_line__=START_CHAR_POS; file_name__.clear(); } TOKEN_GAGGLE& yacco2::tok_can::container(){ return container__; } void yacco2::tok_can::remove(){} std::string& yacco2::tok_can::file_name(){return file_name__;} void yacco2::tok_can::set_file_name(const char* File_name){ file_name__ += File_name; } @*2 Tree token container implementation |tok_can|.\fbreak This is your tree container of tokens. A filter mechanism is passed to the template. It is just a set of terminal enumerates with it companion indicator of include or exclude the terminals in the tree traversal within the tree walker. The traversal operator also keeps a subscript marker as to where its traversed. This allows one to interrogate the container for a token without having to re-traverse the tree. Excuse the acronym but it is a just-in-time delivery mechanism. If the subscript is within bounds of the container, it delivers the already traversed tree's token. Out-of-bounds will continue the tree traversal looking for the requested token-by-number. If the token number is not continuous, the container gets filled up with the inbetween tokens found in the traversal before returning the requested terminal. When the end-of-tree has been met, the |PTR_LR1_eog__| terminal is returned. This is in keeping with the other containers. \fbreak \fbreak Optimization: remove jit for all input filled in at ctor.\fbreak This jit optimization is removed due to self modifying of tree nodes. Without this the old container that called the self modifying of a tree node contains the old T in its container. So in with the reader mutex and its slow down and out with the speed for self modifying tree nodes. Please read ``Notes to myself'' of item ''Tree Modifying while walking them...'' discussing the ``how tos'' of dealing with dynamic self-modifying tree setting. @+= yacco2::tok_can:: tok_can (yacco2::ast_stack& Walker)@/ :tok_base(1)@/ ,pos_(0)@/ ,have_1st_rec__(false)@/ ,tree_end_reached__(false)@/ ,nodes_visited_()@/ ,accept_node_level_()@/ ,traverse_(Walker){ operator[](0);// needed: ensures container has tried to get first T before its use } yacco2::tok_can:: ~tok_can(){} bool yacco2::tok_can:: empty(){ return nodes_visited_.empty(); } void yacco2::tok_can:: clear(){nodes_visited_.clear();} @*2 Tree container dispensor.\fbreak It delivers tokens by the numbers. At present, this number is relative to 0. Ugh!\fbreak \fbreak If the tree node number is within the token container then return it. For token numbers outside the current container, the tree is traversed putting the accepted tokens into the container until either the end-of-tree is reached or the token requested is found. The container of tokens allows one to re-iterate many times over the token stream. It also optimizes the token stream by one-pass-only on the tree. An end-of-tree condition returns the |PTR_LR1_eog__| token back to the user. This is in the same spirit of the other token containers. It allows grammars to be written without any knowledge as to its input token stream. @+= yacco2::CAbs_lr1_sym* yacco2::tok_can:: operator[](yacco2::UINT Pos){ AST* t; CAbs_lr1_sym* tsym; AST* vnode; CAbs_lr1_sym* sym; ast_base_stack::s_rec* srec; if(tree_end_reached__ == true){ if(Pos < pos_) goto in_bnds; if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::tok_can token eog: " @/ << PTR_LR1_eog__<< " pos: " << Pos << __FILE__ << __LINE__<< std::endl; @; } sym = PTR_LR1_eog__; goto rtn_fnd_T; } first_time_accessed:@/ if(have_1st_rec__ == false){ have_1st_rec__ = true; goto out_bnds; } determine_where_t_is: if(Pos <= pos_){// already in container goto in_bnds; } ++pos_;// next node goto out_bnds; in_bnds:@/ @; out_bnds:@/ if(r_w_cnt__ > 1){ @ } get_tree_rec:{ @; @; @; if(Pos == pos_) goto rtn_fnd_T; ++pos_; goto get_tree_rec;// keep filling container until Pos met } rtn_fnd_T:@/ if(r_w_cnt__ > 1){ @; } return sym; } @ Fetch and return token from container instead of tree.\fbreak Prefetch next T and place in container when the current request is on its boundry and parallel readers are occuring.\fbreak Ip constraint: The sequential request always has the T inside its container.\fbreak Random request: Who'll need it? If it happens, the container's suitor count is checked and protected with a mutex. @= t = nodes_visited_[Pos]; tsym = AST::content(*t); if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::tok_can in-bnds already in container token: " << tsym->id__ @/ << " *: " << tsym << " pos: " << Pos << " id: " << tsym->id__<< " enum: " << tsym->enumerated_id__ << FILE_LINE << std::endl; yacco2::lrclog << "\t\t::GPS FILE: "; EXTERNAL_GPSing(tsym) yacco2::lrclog << " GPS LINE: " << tsym->tok_co_ords__.line_no__ << " GPS CHR POS: " << tsym->tok_co_ords__.pos_in_line__ << FILE_LINE << std::endl; @; } @; @ Lookahead T needed? no rtn fnd t.\fbreak Lookahead is only needed when parallel reads are happening. If there is only one reader, it is always safe and can default to ``jit'' access. @= if(r_w_cnt__ == 1) return tsym;// no parallel suitors if(Pos < pos_) return tsym;// not on the edge @ if(tree_end_reached__ == true){// ure parallel phatom got here before u }else{ if(Pos == pos_){// still needed as the other suitor could have looked ahead ++pos_; @; @; } } @ return tsym; @ End of traverse reached for lookahead?. @= if(tree_end_reached__ != true){// test for other consumer's action srec = traverse_.base_stk_.cur_stk_rec_; if(srec == 0){ tree_end_reached__ = true; if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::tok_can token eog: " @/ << PTR_LR1_eog__<< " pos: " << pos_ << FILE_LINE << std::endl; @; } }else{ @; } } @ Traverse tree. @= traverse_.exec(); @ End of traverse reached?. @= srec = traverse_.base_stk_.cur_stk_rec_; if(srec == 0){ tree_end_reached__ = true; if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::tok_can token eog: " @/ << PTR_LR1_eog__<< " pos: " << pos_ << FILE_LINE << std::endl; @; } sym = PTR_LR1_eog__;// end-of-tree goto rtn_fnd_T; } @ Put node in container. @= vnode = traverse_.base_stk_.cur_stk_rec_->node_; sym = AST::content(*vnode); accept_node_level_.push_back(traverse_.base_stk_.idx_); nodes_visited_.push_back(vnode); if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::tok_can token: " << sym->id__ @/ << " *: " << sym << " pos: " << pos_ << " requested pos: " << Pos << " node*: " << vnode << " node content*: " << AST::content(*vnode) << FILE_LINE << std::endl; @; } @*2 Balance of tree container routines. @+= yacco2::UINT yacco2::tok_can:: pos(){return pos_;} yacco2::UINT yacco2::tok_can:: size(){ if(tree_end_reached__ == true){ return nodes_visited_.size(); } return MAX_UINT; } void yacco2::tok_can:: push_back(AST& Tok_ast){ nodes_visited_.push_back(&Tok_ast); ++pos_; } void yacco2::tok_can::// defed due to template push_back(yacco2::CAbs_lr1_sym& Node){} // but not meaningful in tree context yacco2::ast_stack& yacco2::tok_can:: container(){ return traverse_; } std::vector* yacco2::tok_can:: nodes_visited(){ return &nodes_visited_; } void yacco2::tok_can:: remove(){ nodes_visited_.pop_back(); --pos_; } yacco2::AST* yacco2::tok_can:: ast(yacco2::UINT Pos){ if(Pos > pos_) return 0; return nodes_visited_.operator[](Pos); } yacco2::INT yacco2::tok_can:: accept_node_level(yacco2::UINT Pos){ if((accept_node_level_.size() - 1) < Pos) return No_Token_start_pos; return accept_node_level_.operator[](Pos); }