/* Copyright Dave Bone 1998 - 2014 All Rights Reserved. No part of this document may be reproduced without written consent from the author. FILE: esc_seq.lex Dates: 25 Juin 2003 Purpose: escape sequence Eg. \n or octal escape \127 or hex escape \xhhh... where h is a hex digit. Modified: 13 June 2005 - fully array of escape sequences including \uhhhh, and \Uhhhhhhhh Returned: T_esc_seq Err_bad_esc - bad escape sequence. See Stroustrup - The C++ Programming Language P. 830 */ /@ @i "/usr/local/yacco2/copyright.w" @** |esc_seq| Thread.\fbreak Evaluates escape sequences but does not convert them into their binary form. The data is passed back as text for the caller to deal with it. For myself i'm dealing in text format not binary. The finite state tables are also emitted in c++ text form for a c++ compiler to digest. The variants cover the normal c++ backslash character literal like n or b, the octal and hexidecimal variations, and the unicode types. @/ fsm (fsm-id "esc_seq.lex",fsm-filename esc_seq,fsm-namespace NS_esc_seq ,fsm-class Cesc_seq { user-prefix-declaration #include "stdlib.h" *** user-declaration public: yacco2::CAbs_lr1_sym* chk_for_overrun(); std::string data_; std::string hex_data_; std::string octal_data_; *** user-implementation yacco2::CAbs_lr1_sym* Cesc_seq::chk_for_overrun(){ switch (parser__->current_token()->enumerated_id__){ case T_Enum::T_raw_lf_: break; case T_Enum::T_raw_cr_: break; case T_Enum::T_T_eol_: break; case T_Enum::T_LR1_eog_: break; default: return 0; } CAbs_lr1_sym* sym = new Err_bad_eos; sym->set_rc(*parser__->start_token__,__FILE__,__LINE__); parser__->set_use_all_shift_off(); return sym; } *** op parser__->set_use_all_shift_on(); data_.clear(); *** } ,fsm-version "1.0" ,fsm-date "25 Juin 2003" ,fsm-debug "false" ,fsm-comments "C type escape sequence recognizer.") parallel-parser ( parallel-thread-function TH_esc_seq *** parallel-la-boundary eolr *** ) @"/usr/local/yacco2/compiler/grammars/yacco2_T_includes.T" rules{ Resc_seq ( lhs{ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; CAbs_lr1_sym* sym = new T_esc_seq(fsm->data_.c_str()); sym->set_rc(*rule_info__.parser__->start_token__,__FILE__,__LINE__); sym->set_line_no_and_pos_in_line(*rule_info__.parser__->start_token__); RSVP(sym); *** } ){ -> Rslash Rliteral -> Rslash Roctal |.| { op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += '\\'; fsm->data_ += fsm->octal_data_.c_str(); *** } -> Rslash Rhex |.| { op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += fsm->hex_data_.c_str(); *** } -> Rslash Ruhex |.| -> Rslash RUhex |.| } Rslash () { -> "\\" { op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; CAbs_lr1_sym* sym = fsm->chk_for_overrun(); if(sym == 0) return; RSVP(sym); rule_info__.parser__->set_stop_parse(true); *** } } Rliteral (){ -> "n" { /@ New line. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_ += 'n'; *** } -> "t" { /@ Horizontal tab. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+='t'; *** } -> "v" { /@ Vertical tab. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+='v'; *** } -> "b" { /@ Backspace. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+='b'; *** } -> "r" { /@ Carriage return. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+='r'; *** } -> "f" { /@ Form feed. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+='f'; *** } -> "a" { /@ Alert or bell. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+='a'; *** } -> "\\" { /@ Backslash --- escaping the escape. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+="\\"; *** } -> "?" { /@ Question mark. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+='?'; *** } -> "'" { /@ Single quote. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+="'"; *** } -> "\"" { /@ Double quote. @/ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+='"'; *** } -> |?| { op CAbs_lr1_sym* sym = new Err_bad_esc; sym->set_rc(*rule_info__.parser__->start_token__,__FILE__,__LINE__); sym->set_line_no_and_pos_in_line(*rule_info__.parser__->start_token__); RSVP(sym); rule_info__.parser__->set_stop_parse(true); *** } } Ruhex () { -> Ru Rmhex_no Rmhex_no Rcalc_hex_char Rmhex_no Rmhex_no Rcalc_hex_char } Ru () { -> "u" { op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+= 'u'; *** } } RU () { -> "U" { op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+= 'U'; *** } } Rx () { -> "x" { op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; fsm->data_ += "\\"; fsm->data_+= 'x'; *** } } RUhex () { -> RU Rmhex_no Rmhex_no Rcalc_hex_char Rmhex_no Rmhex_no Rcalc_hex_char Rmhex_no Rmhex_no Rcalc_hex_char Rmhex_no Rmhex_no Rcalc_hex_char } Rmhex_no () { -> Rhex_no -> |?| { op CAbs_lr1_sym* sym = new Err_bad_esc; sym->set_rc(*rule_info__.parser__->start_token__,__FILE__,__LINE__); RSVP(sym); rule_info__.parser__->set_stop_parse(true); *** } } Rcalc_hex_char () { -> { op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; unsigned long usl = strtoul(fsm->hex_data_.c_str(),0,16); unsigned char c = usl; fsm->data_ += "\\x"; fsm->data_ += c; fsm->hex_data_.clear(); *** } } Rhex () { -> Rx Rhex_nos } Rhex_nos () { -> Rmhex_no -> Rhex_nos Rhex_no } Rhex_no () { -> Rhex_no_digit -> Rhex_no_letter } Rhex_no_digit ( lhs{ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; size_t pos = rule_info__.parser__->parse_stack__.top_sub__ - 1; CAbs_lr1_sym* sym = rule_info__.parser__->get_spec_stack_token(pos); fsm->hex_data_ += sym->id__; *** } ) { -> 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9 } Rhex_no_letter ( lhs{ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; size_t pos = rule_info__.parser__->parse_stack__.top_sub__ - 1; CAbs_lr1_sym* sym = rule_info__.parser__->get_spec_stack_token(pos); fsm->hex_data_ += sym->id__; *** } ) { -> a -> b -> c -> d -> e -> f -> A -> B -> C -> D -> E -> F } Roctal () { -> Roctal_no -> Roctal_no Roctal_no -> Roctal_no Roctal_no Roctal_no } Roctal_no ( lhs{ op Cesc_seq* fsm = (Cesc_seq*) rule_info__.parser__->fsm_tbl__; size_t pos = rule_info__.parser__->parse_stack__.top_sub__ - 1; CAbs_lr1_sym* sym = rule_info__.parser__->get_spec_stack_token(pos); fsm->octal_data_ += sym->id__; *** } ) { -> 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 } }// end of rules