1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.fileupload;
18
19 import java.io.UnsupportedEncodingException;
20 import java.util.HashMap;
21 import java.util.Locale;
22 import java.util.Map;
23
24 import org.apache.commons.fileupload.util.mime.MimeUtility;
25
26 /**
27 * A simple parser intended to parse sequences of name/value pairs.
28 *
29 * Parameter values are expected to be enclosed in quotes if they
30 * contain unsafe characters, such as '=' characters or separators.
31 * Parameter values are optional and can be omitted.
32 *
33 * <p>
34 * {@code param1 = value; param2 = "anything goes; really"; param3}
35 * </p>
36 */
37 public class ParameterParser {
38
39 /**
40 * String to be parsed.
41 */
42 private char[] chars;
43
44 /**
45 * Current position in the string.
46 */
47 private int pos;
48
49 /**
50 * Maximum position in the string.
51 */
52 private int len;
53
54 /**
55 * Start of a token.
56 */
57 private int i1;
58
59 /**
60 * End of a token.
61 */
62 private int i2;
63
64 /**
65 * Whether names stored in the map should be converted to lower case.
66 */
67 private boolean lowerCaseNames;
68
69 /**
70 * Default ParameterParser constructor.
71 */
72 public ParameterParser() {
73 }
74
75 /**
76 * A helper method to process the parsed token. This method removes
77 * leading and trailing blanks as well as enclosing quotation marks,
78 * when necessary.
79 *
80 * @param quoted {@code true} if quotation marks are expected,
81 * {@code false} otherwise.
82 * @return the token
83 */
84 private String getToken(final boolean quoted) {
85 // Trim leading white spaces
86 while (i1 < i2 && Character.isWhitespace(chars[i1])) {
87 i1++;
88 }
89 // Trim trailing white spaces
90 while (i2 > i1 && Character.isWhitespace(chars[i2 - 1])) {
91 i2--;
92 }
93 // Strip away quotation marks if necessary
94 if (quoted
95 && i2 - i1 >= 2
96 && chars[i1] == '"'
97 && chars[i2 - 1] == '"') {
98 i1++;
99 i2--;
100 }
101 String result = null;
102 if (i2 > i1) {
103 result = new String(chars, i1, i2 - i1);
104 }
105 return result;
106 }
107
108 /**
109 * Are there any characters left to parse?
110 *
111 * @return {@code true} if there are unparsed characters,
112 * {@code false} otherwise.
113 */
114 private boolean hasChar() {
115 return pos < len;
116 }
117
118 /**
119 * Returns {@code true} if parameter names are to be converted to lower
120 * case when name/value pairs are parsed.
121 *
122 * @return {@code true} if parameter names are to be
123 * converted to lower case when name/value pairs are parsed.
124 * Otherwise returns {@code false}
125 */
126 public boolean isLowerCaseNames() {
127 return lowerCaseNames;
128 }
129
130 /**
131 * Tests if the given character is present in the array of characters.
132 *
133 * @param ch the character to test for presence in the array of characters
134 * @param charray the array of characters to test against
135 * @return {@code true} if the character is present in the array of
136 * characters, {@code false} otherwise.
137 */
138 private boolean isOneOf(final char ch, final char[] charray) {
139 boolean result = false;
140 for (final char element : charray) {
141 if (ch == element) {
142 result = true;
143 break;
144 }
145 }
146 return result;
147 }
148
149 /**
150 * Extracts a map of name/value pairs from the given array of
151 * characters. Names are expected to be unique.
152 *
153 * @param charArray the array of characters that contains a sequence of
154 * name/value pairs
155 * @param separator the name/value pairs separator
156 * @return a map of name/value pairs
157 */
158 public Map<String, String> parse(final char[] charArray, final char separator) {
159 if (charArray == null) {
160 return new HashMap<>();
161 }
162 return parse(charArray, 0, charArray.length, separator);
163 }
164
165 /**
166 * Extracts a map of name/value pairs from the given array of characters. Names are expected to be unique.
167 *
168 * @param charArray the array of characters that contains a sequence of name/value pairs
169 * @param offset the initial offset.
170 * @param length the length.
171 * @param separator the name/value pairs separator
172 * @return a map of name/value pairs
173 */
174 public Map<String, String> parse(final char[] charArray, final int offset, final int length, final char separator) {
175 if (charArray == null) {
176 return new HashMap<>();
177 }
178 final HashMap<String, String> params = new HashMap<>();
179 chars = charArray.clone();
180 pos = offset;
181 len = length;
182 while (hasChar()) {
183 String paramName = parseToken(new char[] { '=', separator });
184 String paramValue = null;
185 if (hasChar() && charArray[pos] == '=') {
186 pos++; // skip '='
187 paramValue = parseQuotedToken(new char[] { separator });
188 if (paramValue != null) {
189 try {
190 paramValue = RFC2231Utility.hasEncodedValue(paramName) ? RFC2231Utility.decodeText(paramValue) : MimeUtility.decodeText(paramValue);
191 } catch (final UnsupportedEncodingException e) {
192 // let's keep the original value in this case
193 }
194 }
195 }
196 if (hasChar() && charArray[pos] == separator) {
197 pos++; // skip separator
198 }
199 if (paramName != null && !paramName.isEmpty()) {
200 paramName = RFC2231Utility.stripDelimiter(paramName);
201 if (lowerCaseNames) {
202 paramName = paramName.toLowerCase(Locale.ROOT);
203 }
204 params.put(paramName, paramValue);
205 }
206 }
207 return params;
208 }
209
210 /**
211 * Extracts a map of name/value pairs from the given string. Names are
212 * expected to be unique.
213 *
214 * @param str the string that contains a sequence of name/value pairs
215 * @param separator the name/value pairs separator
216 * @return a map of name/value pairs
217 */
218 public Map<String, String> parse(final String str, final char separator) {
219 if (str == null) {
220 return new HashMap<>();
221 }
222 return parse(str.toCharArray(), separator);
223 }
224
225 /**
226 * Extracts a map of name/value pairs from the given string. Names are
227 * expected to be unique. Multiple separators may be specified and
228 * the earliest found in the input string is used.
229 *
230 * @param str the string that contains a sequence of name/value pairs
231 * @param separators the name/value pairs separators
232 * @return a map of name/value pairs
233 */
234 public Map<String, String> parse(final String str, final char[] separators) {
235 if (separators == null || separators.length == 0) {
236 return new HashMap<>();
237 }
238 char separator = separators[0];
239 if (str != null) {
240 int idx = str.length();
241 for (final char separator2 : separators) {
242 final int tmp = str.indexOf(separator2);
243 if (tmp != -1 && tmp < idx) {
244 idx = tmp;
245 separator = separator2;
246 }
247 }
248 }
249 return parse(str, separator);
250 }
251
252 /**
253 * Parses out a token until any of the given terminators
254 * is encountered outside the quotation marks.
255 *
256 * @param terminators the array of terminating characters. Any of these
257 * characters when encountered outside the quotation marks signify the end
258 * of the token
259 *
260 * @return the token
261 */
262 private String parseQuotedToken(final char[] terminators) {
263 char ch;
264 i1 = pos;
265 i2 = pos;
266 boolean quoted = false;
267 boolean charEscaped = false;
268 while (hasChar()) {
269 ch = chars[pos];
270 if (!quoted && isOneOf(ch, terminators)) {
271 break;
272 }
273 if (!charEscaped && ch == '"') {
274 quoted = !quoted;
275 }
276 charEscaped = !charEscaped && ch == '\\';
277 i2++;
278 pos++;
279
280 }
281 return getToken(true);
282 }
283
284 /**
285 * Parses out a token until any of the given terminators
286 * is encountered.
287 *
288 * @param terminators the array of terminating characters. Any of these
289 * characters when encountered signify the end of the token
290 *
291 * @return the token
292 */
293 private String parseToken(final char[] terminators) {
294 char ch;
295 i1 = pos;
296 i2 = pos;
297 while (hasChar()) {
298 ch = chars[pos];
299 if (isOneOf(ch, terminators)) {
300 break;
301 }
302 i2++;
303 pos++;
304 }
305 return getToken(false);
306 }
307
308 /**
309 * Sets the flag if parameter names are to be converted to lower case when
310 * name/value pairs are parsed.
311 *
312 * @param b {@code true} if parameter names are to be
313 * converted to lower case when name/value pairs are parsed.
314 * {@code false} otherwise.
315 */
316 public void setLowerCaseNames(final boolean b) {
317 lowerCaseNames = b;
318 }
319
320 }