HTML Tidy  5.4.0
The HTACG Tidy HTML Project
streamio.h
Go to the documentation of this file.
1 #ifndef __STREAMIO_H__
2 #define __STREAMIO_H__
3 
4 /* streamio.h -- handles character stream I/O
5 
6  (c) 1998-2007 (W3C) MIT, ERCIM, Keio University
7  See tidy.h for the copyright notice.
8 
9  Wrapper around Tidy input source and output sink
10  that calls appropriate interfaces, and applies
11  necessary char encoding transformations: to/from
12  ISO-10646 and/or UTF-8.
13 
14 */
15 
16 #include "forward.h"
17 #include "tidybuffio.h"
18 #include "fileio.h"
19 
20 #ifdef __cplusplus
21 extern "C"
22 {
23 #endif
24 typedef enum
25 {
29 } IOType;
30 
31 /* states for ISO 2022
32 
33  A document in ISO-2022 based encoding uses some ESC sequences called
34  "designator" to switch character sets. The designators defined and
35  used in ISO-2022-JP are:
36 
37  "ESC" + "(" + ? for ISO646 variants
38 
39  "ESC" + "$" + ? and
40  "ESC" + "$" + "(" + ? for multibyte character sets
41 */
42 typedef enum
43 {
50 } ISO2022State;
51 
52 /************************
53 ** Source
54 ************************/
55 
56 enum
57 {
60 };
61 
62 /* non-raw input is cleaned up*/
63 struct _StreamIn
64 {
65  ISO2022State state; /* FSM for ISO2022 */
67  TidyAllocator *allocator;
71  int tabs;
73  unsigned short curlastpos; /* current last position in lastcols */
74  unsigned short firstlastpos; /* first valid last position in lastcols */
75  int curcol;
76  int curline;
77  int encoding;
79 
81 
82 #ifdef TIDY_WIN32_MLANG_SUPPORT
83  void* mlang;
84 #endif
85 
86 #ifdef TIDY_STORE_ORIGINAL_TEXT
87  tmbstr otextbuf;
88  size_t otextsize;
89  uint otextlen;
90 #endif
91 
92  /* Pointer back to document for error reporting */
93  TidyDocImpl* doc;
94 };
95 
96 StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding );
97 void TY_(freeStreamIn)(StreamIn* in);
98 
99 StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding );
100 StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding );
101 StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding );
102 
103 int TY_(ReadBOMEncoding)(StreamIn *in);
104 uint TY_(ReadChar)( StreamIn* in );
105 void TY_(UngetChar)( uint c, StreamIn* in );
106 Bool TY_(IsEOF)( StreamIn* in );
107 
108 
109 /************************
110 ** Sink
111 ************************/
112 
114 {
115  int encoding;
116  ISO2022State state; /* for ISO 2022 */
118 
119 #ifdef TIDY_WIN32_MLANG_SUPPORT
120  void* mlang;
121 #endif
122 
125 };
126 
127 StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint newln );
128 StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint newln );
129 StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint newln );
130 
131 StreamOut* TY_(StdErrOutput)(void);
132 /* StreamOut* StdOutOutput(void); */
133 void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out );
134 
135 void TY_(WriteChar)( uint c, StreamOut* out );
136 void TY_(outBOM)( StreamOut *out );
137 
138 ctmbstr TY_(GetEncodingNameFromTidyId)(uint id);
139 ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id);
140 int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
141 
142 /************************
143 ** Misc
144 ************************/
145 
146 /* character encodings
147 */
148 #define RAW 0
149 #define ASCII 1
150 #define LATIN0 2
151 #define LATIN1 3
152 #define UTF8 4
153 #define ISO2022 5
154 #define MACROMAN 6
155 #define WIN1252 7
156 #define IBM858 8
157 
158 #if SUPPORT_UTF16_ENCODINGS
159 #define UTF16LE 9
160 #define UTF16BE 10
161 #define UTF16 11
162 #endif
163 
164 /* Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints
165 ** (i.e., to Unicode) before being recoded into UTF-8. This may be
166 ** confusing: usually UTF-8 implies ISO10646 codepoints.
167 */
168 #if SUPPORT_ASIAN_ENCODINGS
169 #if SUPPORT_UTF16_ENCODINGS
170 #define BIG5 12
171 #define SHIFTJIS 13
172 #else
173 #define BIG5 9
174 #define SHIFTJIS 10
175 #endif
176 #endif
177 
178 #ifdef TIDY_WIN32_MLANG_SUPPORT
179 /* hack: windows code page numbers start at 37 */
180 #define WIN32MLANG 36
181 #endif
182 
183 
184 /* Function for conversion from Windows-1252 to Unicode */
185 uint TY_(DecodeWin1252)(uint c);
186 
187 /* Function to convert from MacRoman to Unicode */
188 uint TY_(DecodeMacRoman)(uint c);
189 
190 #ifdef __cplusplus
191 }
192 #endif
193 
194 
195 /* Use numeric constants as opposed to escape chars (\r, \n)
196 ** to avoid conflict Mac compilers that may re-define these.
197 */
198 #define CR 0xD
199 #define LF 0xA
200 
201 #if defined(MAC_OS_CLASSIC)
202 #define DEFAULT_NL_CONFIG TidyCR
203 #elif defined(_WIN32) || defined(OS2_OS)
204 #define DEFAULT_NL_CONFIG TidyCRLF
205 #else
206 #define DEFAULT_NL_CONFIG TidyLF
207 #endif
208 
209 
210 #endif /* __STREAMIO_H__ */
TidyDocImpl * doc
Definition: streamio.h:93
Definition: streamio.h:59
uint bufpos
Definition: streamio.h:69
Treat buffer as an I/O stream.
tchar * charbuf
Definition: streamio.h:68
ISO2022State state
Definition: streamio.h:65
const tmbchar * ctmbstr
Definition: tidyplatform.h:556
uint nl
Definition: streamio.h:117
unsigned short firstlastpos
Definition: streamio.h:74
int encoding
Definition: streamio.h:77
IOType
Definition: streamio.h:24
Definition: streamio.h:45
Definition: streamio.h:48
ISO2022State
Definition: streamio.h:42
Definition: streamio.h:49
TidyInputSource source
Definition: streamio.h:80
Definition: streamio.h:27
Bool pushed
Definition: streamio.h:66
TidyInputSource - Delivers raw bytes of input.
Definition: tidy.h:572
Definition: streamio.h:63
int curline
Definition: streamio.h:76
Bool
Definition: tidyplatform.h:593
Definition: streamio.h:46
unsigned short curlastpos
Definition: streamio.h:73
Definition: streamio.h:113
Definition: streamio.h:47
int lastcols[LASTPOS_SIZE]
Definition: streamio.h:72
int curcol
Definition: streamio.h:75
does standard C I/O
tmbchar * tmbstr
Definition: tidyplatform.h:555
uint bufsize
Definition: streamio.h:70
Definition: streamio.h:44
unsigned int uint
Definition: tidyplatform.h:525
TidyOutputSink - accepts raw bytes of output.
Definition: tidy.h:613
Definition: streamio.h:58
TidyOutputSink sink
Definition: streamio.h:124
Definition: streamio.h:26
Definition: streamio.h:28
TidyAllocator * allocator
Definition: streamio.h:67
int tabs
Definition: streamio.h:71
#define TY_(str)
Definition: forward.h:23
uint tchar
Definition: tidyplatform.h:552
IOType iotype
Definition: streamio.h:78