pion  5.0.6
http_parser.cpp
1 // ---------------------------------------------------------------------
2 // pion: a Boost C++ framework for building lightweight HTTP interfaces
3 // ---------------------------------------------------------------------
4 // Copyright (C) 2007-2014 Splunk Inc. (https://github.com/splunk/pion)
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // See http://www.boost.org/LICENSE_1_0.txt
8 //
9 
10 #include <cstdlib>
11 #include <cstring>
12 #include <boost/regex.hpp>
13 #include <boost/assert.hpp>
14 #include <boost/logic/tribool.hpp>
15 #include <boost/algorithm/string.hpp>
16 #include <pion/algorithm.hpp>
17 #include <pion/http/parser.hpp>
18 #include <pion/http/request.hpp>
19 #include <pion/http/response.hpp>
20 #include <pion/http/message.hpp>
21 
22 
23 namespace pion { // begin namespace pion
24 namespace http { // begin namespace http
25 
26 
27 // static members of parser
28 
29 const boost::uint32_t parser::STATUS_MESSAGE_MAX = 1024; // 1 KB
30 const boost::uint32_t parser::METHOD_MAX = 1024; // 1 KB
31 const boost::uint32_t parser::RESOURCE_MAX = 256 * 1024; // 256 KB
32 const boost::uint32_t parser::QUERY_STRING_MAX = 1024 * 1024; // 1 MB
33 const boost::uint32_t parser::HEADER_NAME_MAX = 1024; // 1 KB
34 const boost::uint32_t parser::HEADER_VALUE_MAX = 1024 * 1024; // 1 MB
35 const boost::uint32_t parser::QUERY_NAME_MAX = 1024; // 1 KB
36 const boost::uint32_t parser::QUERY_VALUE_MAX = 1024 * 1024; // 1 MB
37 const boost::uint32_t parser::COOKIE_NAME_MAX = 1024; // 1 KB
38 const boost::uint32_t parser::COOKIE_VALUE_MAX = 1024 * 1024; // 1 MB
39 const std::size_t parser::DEFAULT_CONTENT_MAX = 1024 * 1024; // 1 MB
40 parser::error_category_t * parser::m_error_category_ptr = NULL;
41 boost::once_flag parser::m_instance_flag = BOOST_ONCE_INIT;
42 
43 
44 // parser member functions
45 
46 boost::tribool parser::parse(http::message& http_msg,
47  boost::system::error_code& ec)
48 {
49  BOOST_ASSERT(! eof() );
50 
51  boost::tribool rc = boost::indeterminate;
52  std::size_t total_bytes_parsed = 0;
53 
54  if(http_msg.has_missing_packets()) {
55  http_msg.set_data_after_missing_packet(true);
56  }
57 
58  do {
59  switch (m_message_parse_state) {
60  // just started parsing the HTTP message
61  case PARSE_START:
62  m_message_parse_state = PARSE_HEADERS;
63  // step through to PARSE_HEADERS
64 
65  // parsing the HTTP headers
66  case PARSE_HEADERS:
67  case PARSE_FOOTERS:
68  rc = parse_headers(http_msg, ec);
69  total_bytes_parsed += m_bytes_last_read;
70  // check if we have finished parsing HTTP headers
71  if (rc == true && m_message_parse_state == PARSE_HEADERS) {
72  // finish_header_parsing() updates m_message_parse_state
73  // We only call this for Headers and not Footers
74  rc = finish_header_parsing(http_msg, ec);
75  }
76  break;
77 
78  // parsing chunked payload content
79  case PARSE_CHUNKS:
80  rc = parse_chunks(http_msg.get_chunk_cache(), ec);
81  total_bytes_parsed += m_bytes_last_read;
82  // check if we have finished parsing all chunks
83  if (rc == true && !m_payload_handler) {
84  http_msg.concatenate_chunks();
85 
86  // Handle footers if present
87  rc = ((m_message_parse_state == PARSE_FOOTERS) ?
88  boost::indeterminate : (boost::tribool)true);
89  }
90  break;
91 
92  // parsing regular payload content with a known length
93  case PARSE_CONTENT:
94  rc = consume_content(http_msg, ec);
95  total_bytes_parsed += m_bytes_last_read;
96  break;
97 
98  // parsing payload content with no length (until EOF)
99  case PARSE_CONTENT_NO_LENGTH:
101  total_bytes_parsed += m_bytes_last_read;
102  break;
103 
104  // finished parsing the HTTP message
105  case PARSE_END:
106  rc = true;
107  break;
108  }
109  } while ( boost::indeterminate(rc) && ! eof() );
110 
111  // check if we've finished parsing the HTTP message
112  if (rc == true) {
113  m_message_parse_state = PARSE_END;
114  finish(http_msg);
115  } else if(rc == false) {
116  compute_msg_status(http_msg, false);
117  }
118 
119  // update bytes last read (aggregate individual operations for caller)
120  m_bytes_last_read = total_bytes_parsed;
121 
122  return rc;
123 }
124 
125 boost::tribool parser::parse_missing_data(http::message& http_msg,
126  std::size_t len, boost::system::error_code& ec)
127 {
128  static const char MISSING_DATA_CHAR = 'X';
129  boost::tribool rc = boost::indeterminate;
130 
131  http_msg.set_missing_packets(true);
132 
133  switch (m_message_parse_state) {
134 
135  // cannot recover from missing data while parsing HTTP headers
136  case PARSE_START:
137  case PARSE_HEADERS:
138  case PARSE_FOOTERS:
139  set_error(ec, ERROR_MISSING_HEADER_DATA);
140  rc = false;
141  break;
142 
143  // parsing chunked payload content
144  case PARSE_CHUNKS:
145  // parsing chunk data -> we can only recover if data fits into current chunk
146  if (m_chunked_content_parse_state == PARSE_CHUNK
147  && m_bytes_read_in_current_chunk < m_size_of_current_chunk
148  && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
149  {
150  // use dummy content for missing data
151  if (m_payload_handler) {
152  for (std::size_t n = 0; n < len; ++n)
153  m_payload_handler(&MISSING_DATA_CHAR, 1);
154  } else {
155  for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n)
156  http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
157  }
158 
159  m_bytes_read_in_current_chunk += len;
160  m_bytes_last_read = len;
161  m_bytes_total_read += len;
162  m_bytes_content_read += len;
163 
164  if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
165  m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
166  }
167  } else {
168  // cannot recover from missing data
169  set_error(ec, ERROR_MISSING_CHUNK_DATA);
170  rc = false;
171  }
172  break;
173 
174  // parsing regular payload content with a known length
175  case PARSE_CONTENT:
176  // parsing content (with length) -> we can only recover if data fits into content
177  if (m_bytes_content_remaining == 0) {
178  // we have all of the remaining payload content
179  rc = true;
180  } else if (m_bytes_content_remaining < len) {
181  // cannot recover from missing data
182  set_error(ec, ERROR_MISSING_TOO_MUCH_CONTENT);
183  rc = false;
184  } else {
185 
186  // make sure content buffer is not already full
187  if (m_payload_handler) {
188  for (std::size_t n = 0; n < len; ++n)
189  m_payload_handler(&MISSING_DATA_CHAR, 1);
190  } else if ( (m_bytes_content_read+len) <= m_max_content_length) {
191  // use dummy content for missing data
192  for (std::size_t n = 0; n < len; ++n)
193  http_msg.get_content()[m_bytes_content_read++] = MISSING_DATA_CHAR;
194  } else {
195  m_bytes_content_read += len;
196  }
197 
198  m_bytes_content_remaining -= len;
199  m_bytes_total_read += len;
200  m_bytes_last_read = len;
201 
202  if (m_bytes_content_remaining == 0)
203  rc = true;
204  }
205  break;
206 
207  // parsing payload content with no length (until EOF)
208  case PARSE_CONTENT_NO_LENGTH:
209  // use dummy content for missing data
210  if (m_payload_handler) {
211  for (std::size_t n = 0; n < len; ++n)
212  m_payload_handler(&MISSING_DATA_CHAR, 1);
213  } else {
214  for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n)
215  http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
216  }
217  m_bytes_last_read = len;
218  m_bytes_total_read += len;
219  m_bytes_content_read += len;
220  break;
221 
222  // finished parsing the HTTP message
223  case PARSE_END:
224  rc = true;
225  break;
226  }
227 
228  // check if we've finished parsing the HTTP message
229  if (rc == true) {
230  m_message_parse_state = PARSE_END;
231  finish(http_msg);
232  } else if(rc == false) {
233  compute_msg_status(http_msg, false);
234  }
235 
236  return rc;
237 }
238 
239 boost::tribool parser::parse_headers(http::message& http_msg,
240  boost::system::error_code& ec)
241 {
242  //
243  // note that boost::tribool may have one of THREE states:
244  //
245  // false: encountered an error while parsing HTTP headers
246  // true: finished successfully parsing the HTTP headers
247  // indeterminate: parsed bytes, but the HTTP headers are not yet finished
248  //
249  const char *read_start_ptr = m_read_ptr;
250  m_bytes_last_read = 0;
251  while (m_read_ptr < m_read_end_ptr) {
252 
253  if (m_save_raw_headers)
254  m_raw_headers += *m_read_ptr;
255 
256  switch (m_headers_parse_state) {
257  case PARSE_METHOD_START:
258  // we have not yet started parsing the HTTP method string
259  if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') { // ignore leading whitespace
260  if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
261  set_error(ec, ERROR_METHOD_CHAR);
262  return false;
263  }
264  m_headers_parse_state = PARSE_METHOD;
265  m_method.erase();
266  m_method.push_back(*m_read_ptr);
267  }
268  break;
269 
270  case PARSE_METHOD:
271  // we have started parsing the HTTP method string
272  if (*m_read_ptr == ' ') {
273  m_resource.erase();
274  m_headers_parse_state = PARSE_URI_STEM;
275  } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
276  set_error(ec, ERROR_METHOD_CHAR);
277  return false;
278  } else if (m_method.size() >= METHOD_MAX) {
279  set_error(ec, ERROR_METHOD_SIZE);
280  return false;
281  } else {
282  m_method.push_back(*m_read_ptr);
283  }
284  break;
285 
286  case PARSE_URI_STEM:
287  // we have started parsing the URI stem (or resource name)
288  if (*m_read_ptr == ' ') {
289  m_headers_parse_state = PARSE_HTTP_VERSION_H;
290  } else if (*m_read_ptr == '?') {
291  m_query_string.erase();
292  m_headers_parse_state = PARSE_URI_QUERY;
293  } else if (*m_read_ptr == '\r') {
294  http_msg.set_version_major(0);
295  http_msg.set_version_minor(0);
296  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
297  } else if (*m_read_ptr == '\n') {
298  http_msg.set_version_major(0);
299  http_msg.set_version_minor(0);
300  m_headers_parse_state = PARSE_EXPECTING_CR;
301  } else if (is_control(*m_read_ptr)) {
302  set_error(ec, ERROR_URI_CHAR);
303  return false;
304  } else if (m_resource.size() >= RESOURCE_MAX) {
305  set_error(ec, ERROR_URI_SIZE);
306  return false;
307  } else {
308  m_resource.push_back(*m_read_ptr);
309  }
310  break;
311 
312  case PARSE_URI_QUERY:
313  // we have started parsing the URI query string
314  if (*m_read_ptr == ' ') {
315  m_headers_parse_state = PARSE_HTTP_VERSION_H;
316  } else if (*m_read_ptr == '\r') {
317  http_msg.set_version_major(0);
318  http_msg.set_version_minor(0);
319  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
320  } else if (*m_read_ptr == '\n') {
321  http_msg.set_version_major(0);
322  http_msg.set_version_minor(0);
323  m_headers_parse_state = PARSE_EXPECTING_CR;
324  } else if (is_control(*m_read_ptr)) {
325  set_error(ec, ERROR_QUERY_CHAR);
326  return false;
327  } else if (m_query_string.size() >= QUERY_STRING_MAX) {
328  set_error(ec, ERROR_QUERY_SIZE);
329  return false;
330  } else {
331  m_query_string.push_back(*m_read_ptr);
332  }
333  break;
334 
335  case PARSE_HTTP_VERSION_H:
336  // parsing "HTTP"
337  if (*m_read_ptr == '\r') {
338  // should only happen for requests (no HTTP/VERSION specified)
339  if (! m_is_request) {
340  set_error(ec, ERROR_VERSION_EMPTY);
341  return false;
342  }
343  http_msg.set_version_major(0);
344  http_msg.set_version_minor(0);
345  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
346  } else if (*m_read_ptr == '\n') {
347  // should only happen for requests (no HTTP/VERSION specified)
348  if (! m_is_request) {
349  set_error(ec, ERROR_VERSION_EMPTY);
350  return false;
351  }
352  http_msg.set_version_major(0);
353  http_msg.set_version_minor(0);
354  m_headers_parse_state = PARSE_EXPECTING_CR;
355  } else if (*m_read_ptr != 'H') {
356  set_error(ec, ERROR_VERSION_CHAR);
357  return false;
358  }
359  m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
360  break;
361 
362  case PARSE_HTTP_VERSION_T_1:
363  // parsing "HTTP"
364  if (*m_read_ptr != 'T') {
365  set_error(ec, ERROR_VERSION_CHAR);
366  return false;
367  }
368  m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
369  break;
370 
371  case PARSE_HTTP_VERSION_T_2:
372  // parsing "HTTP"
373  if (*m_read_ptr != 'T') {
374  set_error(ec, ERROR_VERSION_CHAR);
375  return false;
376  }
377  m_headers_parse_state = PARSE_HTTP_VERSION_P;
378  break;
379 
380  case PARSE_HTTP_VERSION_P:
381  // parsing "HTTP"
382  if (*m_read_ptr != 'P') {
383  set_error(ec, ERROR_VERSION_CHAR);
384  return false;
385  }
386  m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
387  break;
388 
389  case PARSE_HTTP_VERSION_SLASH:
390  // parsing slash after "HTTP"
391  if (*m_read_ptr != '/') {
392  set_error(ec, ERROR_VERSION_CHAR);
393  return false;
394  }
395  m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
396  break;
397 
398  case PARSE_HTTP_VERSION_MAJOR_START:
399  // parsing the first digit of the major version number
400  if (!is_digit(*m_read_ptr)) {
401  set_error(ec, ERROR_VERSION_CHAR);
402  return false;
403  }
404  http_msg.set_version_major(*m_read_ptr - '0');
405  m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
406  break;
407 
408  case PARSE_HTTP_VERSION_MAJOR:
409  // parsing the major version number (not first digit)
410  if (*m_read_ptr == '.') {
411  m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
412  } else if (is_digit(*m_read_ptr)) {
413  http_msg.set_version_major( (http_msg.get_version_major() * 10)
414  + (*m_read_ptr - '0') );
415  } else {
416  set_error(ec, ERROR_VERSION_CHAR);
417  return false;
418  }
419  break;
420 
421  case PARSE_HTTP_VERSION_MINOR_START:
422  // parsing the first digit of the minor version number
423  if (!is_digit(*m_read_ptr)) {
424  set_error(ec, ERROR_VERSION_CHAR);
425  return false;
426  }
427  http_msg.set_version_minor(*m_read_ptr - '0');
428  m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
429  break;
430 
431  case PARSE_HTTP_VERSION_MINOR:
432  // parsing the major version number (not first digit)
433  if (*m_read_ptr == ' ') {
434  // ignore trailing spaces after version in request
435  if (! m_is_request) {
436  m_headers_parse_state = PARSE_STATUS_CODE_START;
437  }
438  } else if (*m_read_ptr == '\r') {
439  // should only happen for requests
440  if (! m_is_request) {
441  set_error(ec, ERROR_STATUS_EMPTY);
442  return false;
443  }
444  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
445  } else if (*m_read_ptr == '\n') {
446  // should only happen for requests
447  if (! m_is_request) {
448  set_error(ec, ERROR_STATUS_EMPTY);
449  return false;
450  }
451  m_headers_parse_state = PARSE_EXPECTING_CR;
452  } else if (is_digit(*m_read_ptr)) {
453  http_msg.set_version_minor( (http_msg.get_version_minor() * 10)
454  + (*m_read_ptr - '0') );
455  } else {
456  set_error(ec, ERROR_VERSION_CHAR);
457  return false;
458  }
459  break;
460 
461  case PARSE_STATUS_CODE_START:
462  // parsing the first digit of the response status code
463  if (!is_digit(*m_read_ptr)) {
464  set_error(ec, ERROR_STATUS_CHAR);
465  return false;
466  }
467  m_status_code = (*m_read_ptr - '0');
468  m_headers_parse_state = PARSE_STATUS_CODE;
469  break;
470 
471  case PARSE_STATUS_CODE:
472  // parsing the response status code (not first digit)
473  if (*m_read_ptr == ' ') {
474  m_status_message.erase();
475  m_headers_parse_state = PARSE_STATUS_MESSAGE;
476  } else if (is_digit(*m_read_ptr)) {
477  m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
478  } else if (*m_read_ptr == '\r') {
479  // recover from status message not sent
480  m_status_message.erase();
481  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
482  } else if (*m_read_ptr == '\n') {
483  // recover from status message not sent
484  m_status_message.erase();
485  m_headers_parse_state = PARSE_EXPECTING_CR;
486  } else {
487  set_error(ec, ERROR_STATUS_CHAR);
488  return false;
489  }
490  break;
491 
492  case PARSE_STATUS_MESSAGE:
493  // parsing the response status message
494  if (*m_read_ptr == '\r') {
495  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
496  } else if (*m_read_ptr == '\n') {
497  m_headers_parse_state = PARSE_EXPECTING_CR;
498  } else if (is_control(*m_read_ptr)) {
499  set_error(ec, ERROR_STATUS_CHAR);
500  return false;
501  } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
502  set_error(ec, ERROR_STATUS_CHAR);
503  return false;
504  } else {
505  m_status_message.push_back(*m_read_ptr);
506  }
507  break;
508 
509  case PARSE_EXPECTING_NEWLINE:
510  // we received a CR; expecting a newline to follow
511  if (*m_read_ptr == '\n') {
512  // check if this is a HTTP 0.9 "Simple Request"
513  if (m_is_request && http_msg.get_version_major() == 0) {
514  PION_LOG_DEBUG(m_logger, "HTTP 0.9 Simple-Request found");
515  ++m_read_ptr;
516  m_bytes_last_read = (m_read_ptr - read_start_ptr);
517  m_bytes_total_read += m_bytes_last_read;
518  return true;
519  } else {
520  m_headers_parse_state = PARSE_HEADER_START;
521  }
522  } else if (*m_read_ptr == '\r') {
523  // we received two CR's in a row
524  // assume CR only is (incorrectly) being used for line termination
525  // therefore, the message is finished
526  ++m_read_ptr;
527  m_bytes_last_read = (m_read_ptr - read_start_ptr);
528  m_bytes_total_read += m_bytes_last_read;
529  return true;
530  } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
531  m_headers_parse_state = PARSE_HEADER_WHITESPACE;
532  } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
533  set_error(ec, ERROR_HEADER_CHAR);
534  return false;
535  } else {
536  // assume it is the first character for the name of a header
537  m_header_name.erase();
538  m_header_name.push_back(*m_read_ptr);
539  m_headers_parse_state = PARSE_HEADER_NAME;
540  }
541  break;
542 
543  case PARSE_EXPECTING_CR:
544  // we received a newline without a CR
545  if (*m_read_ptr == '\r') {
546  m_headers_parse_state = PARSE_HEADER_START;
547  } else if (*m_read_ptr == '\n') {
548  // we received two newlines in a row
549  // assume newline only is (incorrectly) being used for line termination
550  // therefore, the message is finished
551  ++m_read_ptr;
552  m_bytes_last_read = (m_read_ptr - read_start_ptr);
553  m_bytes_total_read += m_bytes_last_read;
554  return true;
555  } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
556  m_headers_parse_state = PARSE_HEADER_WHITESPACE;
557  } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
558  set_error(ec, ERROR_HEADER_CHAR);
559  return false;
560  } else {
561  // assume it is the first character for the name of a header
562  m_header_name.erase();
563  m_header_name.push_back(*m_read_ptr);
564  m_headers_parse_state = PARSE_HEADER_NAME;
565  }
566  break;
567 
568  case PARSE_HEADER_WHITESPACE:
569  // parsing whitespace before a header name
570  if (*m_read_ptr == '\r') {
571  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
572  } else if (*m_read_ptr == '\n') {
573  m_headers_parse_state = PARSE_EXPECTING_CR;
574  } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
575  if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
576  set_error(ec, ERROR_HEADER_CHAR);
577  return false;
578  }
579  // assume it is the first character for the name of a header
580  m_header_name.erase();
581  m_header_name.push_back(*m_read_ptr);
582  m_headers_parse_state = PARSE_HEADER_NAME;
583  }
584  break;
585 
586  case PARSE_HEADER_START:
587  // parsing the start of a new header
588  if (*m_read_ptr == '\r') {
589  m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
590  } else if (*m_read_ptr == '\n') {
591  m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
592  } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
593  m_headers_parse_state = PARSE_HEADER_WHITESPACE;
594  } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
595  set_error(ec, ERROR_HEADER_CHAR);
596  return false;
597  } else {
598  // first character for the name of a header
599  m_header_name.erase();
600  m_header_name.push_back(*m_read_ptr);
601  m_headers_parse_state = PARSE_HEADER_NAME;
602  }
603  break;
604 
605  case PARSE_HEADER_NAME:
606  // parsing the name of a header
607  if (*m_read_ptr == ':') {
608  m_header_value.erase();
609  m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
610  } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
611  set_error(ec, ERROR_HEADER_CHAR);
612  return false;
613  } else if (m_header_name.size() >= HEADER_NAME_MAX) {
614  set_error(ec, ERROR_HEADER_NAME_SIZE);
615  return false;
616  } else {
617  // character (not first) for the name of a header
618  m_header_name.push_back(*m_read_ptr);
619  }
620  break;
621 
622  case PARSE_SPACE_BEFORE_HEADER_VALUE:
623  // parsing space character before a header's value
624  if (*m_read_ptr == ' ') {
625  m_headers_parse_state = PARSE_HEADER_VALUE;
626  } else if (*m_read_ptr == '\r') {
627  http_msg.add_header(m_header_name, m_header_value);
628  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
629  } else if (*m_read_ptr == '\n') {
630  http_msg.add_header(m_header_name, m_header_value);
631  m_headers_parse_state = PARSE_EXPECTING_CR;
632  } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
633  set_error(ec, ERROR_HEADER_CHAR);
634  return false;
635  } else {
636  // assume it is the first character for the value of a header
637  m_header_value.push_back(*m_read_ptr);
638  m_headers_parse_state = PARSE_HEADER_VALUE;
639  }
640  break;
641 
642  case PARSE_HEADER_VALUE:
643  // parsing the value of a header
644  if (*m_read_ptr == '\r') {
645  http_msg.add_header(m_header_name, m_header_value);
646  m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
647  } else if (*m_read_ptr == '\n') {
648  http_msg.add_header(m_header_name, m_header_value);
649  m_headers_parse_state = PARSE_EXPECTING_CR;
650  } else if (*m_read_ptr != '\t' && is_control(*m_read_ptr)) {
651  // RFC 2616, 2.2 basic Rules.
652  // TEXT = <any OCTET except CTLs, but including LWS>
653  // LWS = [CRLF] 1*( SP | HT )
654  //
655  // TODO: parsing of folding LWS in multiple lines headers
656  // doesn't work properly still
657  set_error(ec, ERROR_HEADER_CHAR);
658  return false;
659  } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
660  set_error(ec, ERROR_HEADER_VALUE_SIZE);
661  return false;
662  } else {
663  // character (not first) for the value of a header
664  m_header_value.push_back(*m_read_ptr);
665  }
666  break;
667 
668  case PARSE_EXPECTING_FINAL_NEWLINE:
669  if (*m_read_ptr == '\n') ++m_read_ptr;
670  m_bytes_last_read = (m_read_ptr - read_start_ptr);
671  m_bytes_total_read += m_bytes_last_read;
672  return true;
673 
674  case PARSE_EXPECTING_FINAL_CR:
675  if (*m_read_ptr == '\r') ++m_read_ptr;
676  m_bytes_last_read = (m_read_ptr - read_start_ptr);
677  m_bytes_total_read += m_bytes_last_read;
678  return true;
679  }
680 
681  ++m_read_ptr;
682  }
683 
684  m_bytes_last_read = (m_read_ptr - read_start_ptr);
685  m_bytes_total_read += m_bytes_last_read;
686  return boost::indeterminate;
687 }
688 
690 {
691  if (is_parsing_request()) {
692 
693  // finish an HTTP request message
694 
695  http::request& http_request(dynamic_cast<http::request&>(http_msg));
696  http_request.set_method(m_method);
697  http_request.set_resource(m_resource);
698  http_request.set_query_string(m_query_string);
699 
700  // parse query pairs from the URI query string
701  if (! m_query_string.empty()) {
702  if (! parse_url_encoded(http_request.get_queries(),
703  m_query_string.c_str(),
704  m_query_string.size()))
705  PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)");
706  }
707 
708  // parse "Cookie" headers in request
709  std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
710  cookie_pair = http_request.get_headers().equal_range(http::types::HEADER_COOKIE);
711  for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
712  cookie_iterator != http_request.get_headers().end()
713  && cookie_iterator != cookie_pair.second; ++cookie_iterator)
714  {
715  if (! parse_cookie_header(http_request.get_cookies(),
716  cookie_iterator->second, false) )
717  PION_LOG_WARN(m_logger, "Cookie header parsing failed");
718  }
719 
720  } else {
721 
722  // finish an HTTP response message
723 
724  http::response& http_response(dynamic_cast<http::response&>(http_msg));
725  http_response.set_status_code(m_status_code);
726  http_response.set_status_message(m_status_message);
727 
728  // parse "Set-Cookie" headers in response
729  std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
730  cookie_pair = http_response.get_headers().equal_range(http::types::HEADER_SET_COOKIE);
731  for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
732  cookie_iterator != http_response.get_headers().end()
733  && cookie_iterator != cookie_pair.second; ++cookie_iterator)
734  {
735  if (! parse_cookie_header(http_response.get_cookies(),
736  cookie_iterator->second, true) )
737  PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed");
738  }
739 
740  }
741 }
742 
744  boost::system::error_code& ec)
745 {
746  boost::tribool rc = boost::indeterminate;
747 
748  m_bytes_content_remaining = m_bytes_content_read = 0;
749  http_msg.set_content_length(0);
752 
753  if (http_msg.is_chunked()) {
754 
755  // content is encoded using chunks
756  m_message_parse_state = PARSE_CHUNKS;
757 
758  // return true if parsing headers only
759  if (m_parse_headers_only)
760  rc = true;
761 
762  } else if (http_msg.is_content_length_implied()) {
763 
764  // content length is implied to be zero
765  m_message_parse_state = PARSE_END;
766  rc = true;
767 
768  } else {
769  // content length should be specified in the headers
770 
771  if (http_msg.has_header(http::types::HEADER_CONTENT_LENGTH)) {
772 
773  // message has a content-length header
774  try {
776  } catch (...) {
777  PION_LOG_ERROR(m_logger, "Unable to update content length");
778  set_error(ec, ERROR_INVALID_CONTENT_LENGTH);
779  return false;
780  }
781 
782  // check if content-length header == 0
783  if (http_msg.get_content_length() == 0) {
784  m_message_parse_state = PARSE_END;
785  rc = true;
786  } else {
787  m_message_parse_state = PARSE_CONTENT;
788  m_bytes_content_remaining = http_msg.get_content_length();
789 
790  // check if content-length exceeds maximum allowed
791  if (m_bytes_content_remaining > m_max_content_length)
792  http_msg.set_content_length(m_max_content_length);
793 
794  if (m_parse_headers_only) {
795  // return true if parsing headers only
796  rc = true;
797  } else {
798  // allocate a buffer for payload content (may be zero-size)
799  http_msg.create_content_buffer();
800  }
801  }
802 
803  } else {
804  // no content-length specified, and the content length cannot
805  // otherwise be determined
806 
807  // only if not a request, read through the close of the connection
808  if (! m_is_request) {
809  // clear the chunk buffers before we start
810  http_msg.get_chunk_cache().clear();
811 
812  // continue reading content until there is no more data
813  m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
814 
815  // return true if parsing headers only
816  if (m_parse_headers_only)
817  rc = true;
818  } else {
819  m_message_parse_state = PARSE_END;
820  rc = true;
821  }
822  }
823  }
824 
826 
827  return rc;
828 }
829 
830 bool parser::parse_uri(const std::string& uri, std::string& proto,
831  std::string& host, boost::uint16_t& port,
832  std::string& path, std::string& query)
833 {
834  size_t proto_end = uri.find("://");
835  size_t proto_len = 0;
836 
837  if(proto_end != std::string::npos) {
838  proto = uri.substr(0, proto_end);
839  proto_len = proto_end + 3; // add ://
840  } else {
841  proto.clear();
842  }
843 
844  // find a first slash charact
845  // that indicates the end of the <server>:<port> part
846  size_t server_port_end = uri.find('/', proto_len);
847  if (server_port_end == std::string::npos) {
848  // no path -> use just /
849  path = "/";
850  server_port_end = uri.size();
851  }
852 
853  // copy <server>:<port> into temp string
854  std::string t;
855  t = uri.substr(proto_len, server_port_end - proto_len);
856  size_t port_pos = t.find(':', 0);
857 
858  // assign output host and port parameters
859 
860  host = t.substr(0, port_pos); // if port_pos == npos, copy whole string
861  if(host.length() == 0) {
862  return false;
863  }
864 
865  // parse the port, if it's not empty
866  if(port_pos != std::string::npos) {
867  try {
868  port = boost::lexical_cast<int>(t.substr(port_pos+1));
869  } catch (boost::bad_lexical_cast &) {
870  return false;
871  }
872  } else if (proto == "http" || proto == "HTTP") {
873  port = 80;
874  } else if (proto == "https" || proto == "HTTPS") {
875  port = 443;
876  } else {
877  port = 0;
878  }
879 
880  if (server_port_end < uri.size()) {
881  // copy the rest of the URI into path part
882  path = uri.substr(server_port_end);
883 
884  // split the path and the query string parts
885  size_t query_pos = path.find('?', 0);
886 
887  if(query_pos != std::string::npos) {
888  query = path.substr(query_pos + 1, path.length() - query_pos - 1);
889  path = path.substr(0, query_pos);
890  } else {
891  query.clear();
892  }
893  }
894 
895  return true;
896 }
897 
898 bool parser::parse_url_encoded(ihash_multimap& dict,
899  const char *ptr, const size_t len)
900 {
901  // sanity check
902  if (ptr == NULL || len == 0)
903  return true;
904 
905  // used to track whether we are parsing the name or value
906  enum QueryParseState {
907  QUERY_PARSE_NAME, QUERY_PARSE_VALUE
908  } parse_state = QUERY_PARSE_NAME;
909 
910  // misc other variables used for parsing
911  const char * const end = ptr + len;
912  std::string query_name;
913  std::string query_value;
914 
915  // iterate through each encoded character
916  while (ptr < end) {
917  switch (parse_state) {
918 
919  case QUERY_PARSE_NAME:
920  // parsing query name
921  if (*ptr == '=') {
922  // end of name found (OK if empty)
923  parse_state = QUERY_PARSE_VALUE;
924  } else if (*ptr == '&') {
925  // if query name is empty, just skip it (i.e. "&&")
926  if (! query_name.empty()) {
927  // assume that "=" is missing -- it's OK if the value is empty
928  dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
929  query_name.erase();
930  }
931  } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
932  // ignore linefeeds, carriage return and tabs (normally within POST content)
933  } else if (is_control(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
934  // control character detected, or max sized exceeded
935  return false;
936  } else {
937  // character is part of the name
938  query_name.push_back(*ptr);
939  }
940  break;
941 
942  case QUERY_PARSE_VALUE:
943  // parsing query value
944  if (*ptr == '&') {
945  // end of value found (OK if empty)
946  if (! query_name.empty()) {
947  dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
948  query_name.erase();
949  }
950  query_value.erase();
951  parse_state = QUERY_PARSE_NAME;
952  } else if (*ptr == ',') {
953  // end of value found in multi-value list (OK if empty)
954  if (! query_name.empty())
955  dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
956  query_value.erase();
957  } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
958  // ignore linefeeds, carriage return and tabs (normally within POST content)
959  } else if (is_control(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
960  // control character detected, or max sized exceeded
961  return false;
962  } else {
963  // character is part of the value
964  query_value.push_back(*ptr);
965  }
966  break;
967  }
968 
969  ++ptr;
970  }
971 
972  // handle last pair in string
973  if (! query_name.empty())
974  dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
975 
976  return true;
977 }
978 
979 bool parser::parse_multipart_form_data(ihash_multimap& dict,
980  const std::string& content_type,
981  const char *ptr, const size_t len)
982 {
983  // sanity check
984  if (ptr == NULL || len == 0)
985  return true;
986 
987  // parse field boundary
988  std::size_t pos = content_type.find("boundary=");
989  if (pos == std::string::npos)
990  return false;
991  const std::string boundary = std::string("--") + content_type.substr(pos+9);
992 
993  // used to track what we are parsing
994  enum MultiPartParseState {
995  MP_PARSE_START,
996  MP_PARSE_HEADER_CR, MP_PARSE_HEADER_LF,
997  MP_PARSE_HEADER_NAME, MP_PARSE_HEADER_SPACE, MP_PARSE_HEADER_VALUE,
998  MP_PARSE_HEADER_LAST_LF, MP_PARSE_FIELD_DATA
999  } parse_state = MP_PARSE_START;
1000 
1001  // a few variables used for parsing
1002  std::string header_name;
1003  std::string header_value;
1004  std::string field_name;
1005  std::string field_value;
1006  bool found_parameter = false;
1007  bool save_current_field = true;
1008  const char * const end_ptr = ptr + len;
1009 
1010  ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
1011 
1012  while (ptr != NULL && ptr < end_ptr) {
1013  switch (parse_state) {
1014  case MP_PARSE_START:
1015  // start parsing a new field
1016  header_name.clear();
1017  header_value.clear();
1018  field_name.clear();
1019  field_value.clear();
1020  save_current_field = true;
1021  ptr += boundary.size() - 1;
1022  parse_state = MP_PARSE_HEADER_CR;
1023  break;
1024  case MP_PARSE_HEADER_CR:
1025  // expecting CR while parsing headers
1026  if (*ptr == '\r') {
1027  // got it -> look for linefeed
1028  parse_state = MP_PARSE_HEADER_LF;
1029  } else if (*ptr == '\n') {
1030  // got a linefeed? try to ignore and start parsing header
1031  parse_state = MP_PARSE_HEADER_NAME;
1032  } else if (*ptr == '-' && ptr+1 < end_ptr && ptr[1] == '-') {
1033  // end of multipart content
1034  return true;
1035  } else return false;
1036  break;
1037  case MP_PARSE_HEADER_LF:
1038  // expecting LF while parsing headers
1039  if (*ptr == '\n') {
1040  // got it -> start parsing header name
1041  parse_state = MP_PARSE_HEADER_NAME;
1042  } else return false;
1043  break;
1044  case MP_PARSE_HEADER_NAME:
1045  // parsing the name of a header
1046  if (*ptr == '\r' || *ptr == '\n') {
1047  if (header_name.empty()) {
1048  // got CR or LF at beginning; skip to data
1049  parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LAST_LF : MP_PARSE_FIELD_DATA);
1050  } else {
1051  // premature CR or LF -> just ignore and start parsing next header
1052  parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
1053  }
1054  } else if (*ptr == ':') {
1055  // done parsing header name -> consume space next
1056  parse_state = MP_PARSE_HEADER_SPACE;
1057  } else {
1058  // one more byte for header name
1059  header_name += *ptr;
1060  }
1061  break;
1062  case MP_PARSE_HEADER_SPACE:
1063  // expecting a space before header value
1064  if (*ptr == '\r') {
1065  // premature CR -> just ignore and start parsing next header
1066  parse_state = MP_PARSE_HEADER_LF;
1067  } else if (*ptr == '\n') {
1068  // premature LF -> just ignore and start parsing next header
1069  parse_state = MP_PARSE_HEADER_NAME;
1070  } else if (*ptr != ' ') {
1071  // not a space -> assume it's a value char
1072  header_value += *ptr;
1073  parse_state = MP_PARSE_HEADER_VALUE;
1074  }
1075  // otherwise just ignore the space(s)
1076  break;
1077  case MP_PARSE_HEADER_VALUE:
1078  // parsing the value of a header
1079  if (*ptr == '\r' || *ptr == '\n') {
1080  // reached the end of the value -> check if it's important
1081  if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_TYPE)) {
1082  // only keep fields that have a text type or no type
1083  save_current_field = boost::algorithm::iequals(header_value.substr(0, 5), "text/");
1084  } else if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_DISPOSITION)) {
1085  // get current field from content-disposition header
1086  std::size_t name_pos = header_value.find("name=\"");
1087  if (name_pos != std::string::npos) {
1088  for (name_pos += 6; name_pos < header_value.size() && header_value[name_pos] != '\"'; ++name_pos) {
1089  field_name += header_value[name_pos];
1090  }
1091  }
1092  }
1093  // clear values and start parsing next header
1094  header_name.clear();
1095  header_value.clear();
1096  parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
1097  } else {
1098  // one more byte for header value
1099  header_value += *ptr;
1100  }
1101  break;
1102  case MP_PARSE_HEADER_LAST_LF:
1103  // expecting final linefeed to terminate headers and begin field data
1104  if (*ptr == '\n') {
1105  // got it
1106  if (save_current_field && !field_name.empty()) {
1107  // parse the field if we care & know enough about it
1108  parse_state = MP_PARSE_FIELD_DATA;
1109  } else {
1110  // otherwise skip ahead to next field
1111  parse_state = MP_PARSE_START;
1112  ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
1113  }
1114  } else return false;
1115  break;
1116  case MP_PARSE_FIELD_DATA:
1117  // parsing the value of a field -> find the end of it
1118  const char *field_end_ptr = end_ptr;
1119  const char *next_ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
1120  if (next_ptr) {
1121  // don't include CRLF before next boundary
1122  const char *temp_ptr = next_ptr - 2;
1123  if (temp_ptr[0] == '\r' && temp_ptr[1] == '\n')
1124  field_end_ptr = temp_ptr;
1125  else field_end_ptr = next_ptr;
1126  }
1127  field_value.assign(ptr, field_end_ptr - ptr);
1128  // add the field to the query dictionary
1129  dict.insert( std::make_pair(field_name, field_value) );
1130  found_parameter = true;
1131  // skip ahead to next field
1132  parse_state = MP_PARSE_START;
1133  ptr = next_ptr;
1134  break;
1135  }
1136  // we've already bumped position if MP_PARSE_START
1137  if (parse_state != MP_PARSE_START)
1138  ++ptr;
1139  }
1140 
1141  return found_parameter;
1142 }
1143 
1144 bool parser::parse_cookie_header(ihash_multimap& dict,
1145  const char *ptr, const size_t len,
1146  bool set_cookie_header)
1147 {
1148  // BASED ON RFC 2109
1149  // http://www.ietf.org/rfc/rfc2109.txt
1150  //
1151  // The current implementation ignores cookie attributes which begin with '$'
1152  // (i.e. $Path=/, $Domain=, etc.)
1153 
1154  // used to track what we are parsing
1155  enum CookieParseState {
1156  COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
1157  } parse_state = COOKIE_PARSE_NAME;
1158 
1159  // misc other variables used for parsing
1160  const char * const end = ptr + len;
1161  std::string cookie_name;
1162  std::string cookie_value;
1163  char value_quote_character = '\0';
1164 
1165  // iterate through each character
1166  while (ptr < end) {
1167  switch (parse_state) {
1168 
1169  case COOKIE_PARSE_NAME:
1170  // parsing cookie name
1171  if (*ptr == '=') {
1172  // end of name found (OK if empty)
1173  value_quote_character = '\0';
1174  parse_state = COOKIE_PARSE_VALUE;
1175  } else if (*ptr == ';' || *ptr == ',') {
1176  // ignore empty cookie names since this may occur naturally
1177  // when quoted values are encountered
1178  if (! cookie_name.empty()) {
1179  // value is empty (OK)
1180  if (! is_cookie_attribute(cookie_name, set_cookie_header))
1181  dict.insert( std::make_pair(cookie_name, cookie_value) );
1182  cookie_name.erase();
1183  }
1184  } else if (*ptr != ' ') { // ignore whitespace
1185  // check if control character detected, or max sized exceeded
1186  if (is_control(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
1187  return false;
1188  // character is part of the name
1189  cookie_name.push_back(*ptr);
1190  }
1191  break;
1192 
1193  case COOKIE_PARSE_VALUE:
1194  // parsing cookie value
1195  if (value_quote_character == '\0') {
1196  // value is not (yet) quoted
1197  if (*ptr == ';' || *ptr == ',') {
1198  // end of value found (OK if empty)
1199  if (! is_cookie_attribute(cookie_name, set_cookie_header))
1200  dict.insert( std::make_pair(cookie_name, cookie_value) );
1201  cookie_name.erase();
1202  cookie_value.erase();
1203  parse_state = COOKIE_PARSE_NAME;
1204  } else if (*ptr == '\'' || *ptr == '"') {
1205  if (cookie_value.empty()) {
1206  // begin quoted value
1207  value_quote_character = *ptr;
1208  } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
1209  // max size exceeded
1210  return false;
1211  } else {
1212  // assume character is part of the (unquoted) value
1213  cookie_value.push_back(*ptr);
1214  }
1215  } else if (*ptr != ' ' || !cookie_value.empty()) { // ignore leading unquoted whitespace
1216  // check if control character detected, or max sized exceeded
1217  if (is_control(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
1218  return false;
1219  // character is part of the (unquoted) value
1220  cookie_value.push_back(*ptr);
1221  }
1222  } else {
1223  // value is quoted
1224  if (*ptr == value_quote_character) {
1225  // end of value found (OK if empty)
1226  if (! is_cookie_attribute(cookie_name, set_cookie_header))
1227  dict.insert( std::make_pair(cookie_name, cookie_value) );
1228  cookie_name.erase();
1229  cookie_value.erase();
1230  parse_state = COOKIE_PARSE_IGNORE;
1231  } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
1232  // max size exceeded
1233  return false;
1234  } else {
1235  // character is part of the (quoted) value
1236  cookie_value.push_back(*ptr);
1237  }
1238  }
1239  break;
1240 
1241  case COOKIE_PARSE_IGNORE:
1242  // ignore everything until we reach a comma "," or semicolon ";"
1243  if (*ptr == ';' || *ptr == ',')
1244  parse_state = COOKIE_PARSE_NAME;
1245  break;
1246  }
1247 
1248  ++ptr;
1249  }
1250 
1251  // handle last cookie in string
1252  if (! is_cookie_attribute(cookie_name, set_cookie_header))
1253  dict.insert( std::make_pair(cookie_name, cookie_value) );
1254 
1255  return true;
1256 }
1257 
1259  boost::system::error_code& ec)
1260 {
1261  //
1262  // note that boost::tribool may have one of THREE states:
1263  //
1264  // false: encountered an error while parsing message
1265  // true: finished successfully parsing the message
1266  // indeterminate: parsed bytes, but the message is not yet finished
1267  //
1268  const char *read_start_ptr = m_read_ptr;
1269  m_bytes_last_read = 0;
1270  while (m_read_ptr < m_read_end_ptr) {
1271 
1272  switch (m_chunked_content_parse_state) {
1273  case PARSE_CHUNK_SIZE_START:
1274  // we have not yet started parsing the next chunk size
1275  if (is_hex_digit(*m_read_ptr)) {
1276  m_chunk_size_str.erase();
1277  m_chunk_size_str.push_back(*m_read_ptr);
1278  m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
1279  } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
1280  // Ignore leading whitespace. Technically, the standard probably doesn't allow white space here,
1281  // but we'll be flexible, since there's no ambiguity.
1282  break;
1283  } else {
1284  set_error(ec, ERROR_CHUNK_CHAR);
1285  return false;
1286  }
1287  break;
1288 
1289  case PARSE_CHUNK_SIZE:
1290  if (is_hex_digit(*m_read_ptr)) {
1291  m_chunk_size_str.push_back(*m_read_ptr);
1292  } else if (*m_read_ptr == '\x0D') {
1293  m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
1294  } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
1295  // Ignore trailing tabs or spaces. Technically, the standard probably doesn't allow this,
1296  // but we'll be flexible, since there's no ambiguity.
1297  m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
1298  } else if (*m_read_ptr == ';') {
1299  // Following the semicolon we have text which will be ignored till we encounter
1300  // a CRLF
1301  m_chunked_content_parse_state = PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE;
1302  } else {
1303  set_error(ec, ERROR_CHUNK_CHAR);
1304  return false;
1305  }
1306  break;
1307 
1308  case PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE:
1309  if (*m_read_ptr == '\x0D') {
1310  m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
1311  }
1312  break;
1313 
1314  case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
1315  if (*m_read_ptr == '\x0D') {
1316  m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
1317  } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
1318  // Ignore trailing tabs or spaces. Technically, the standard probably doesn't allow this,
1319  // but we'll be flexible, since there's no ambiguity.
1320  break;
1321  } else {
1322  set_error(ec, ERROR_CHUNK_CHAR);
1323  return false;
1324  }
1325  break;
1326 
1327  case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
1328  // We received a CR; expecting LF to follow. We can't be flexible here because
1329  // if we see anything other than LF, we can't be certain where the chunk starts.
1330  if (*m_read_ptr == '\x0A') {
1331  m_bytes_read_in_current_chunk = 0;
1332  m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
1333  if (m_size_of_current_chunk == 0) {
1334  m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK;
1335  } else {
1336  m_chunked_content_parse_state = PARSE_CHUNK;
1337  }
1338  } else {
1339  set_error(ec, ERROR_CHUNK_CHAR);
1340  return false;
1341  }
1342  break;
1343 
1344  case PARSE_CHUNK:
1345  if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
1346  if (m_payload_handler) {
1347  const std::size_t bytes_avail = bytes_available();
1348  const std::size_t bytes_in_chunk = m_size_of_current_chunk - m_bytes_read_in_current_chunk;
1349  const std::size_t len = (bytes_in_chunk > bytes_avail) ? bytes_avail : bytes_in_chunk;
1350  m_payload_handler(m_read_ptr, len);
1351  m_bytes_read_in_current_chunk += len;
1352  if (len > 1) m_read_ptr += (len - 1);
1353  } else if (chunks.size() < m_max_content_length) {
1354  chunks.push_back(*m_read_ptr);
1355  m_bytes_read_in_current_chunk++;
1356  }
1357  }
1358  if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
1359  m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
1360  }
1361  break;
1362 
1363  case PARSE_EXPECTING_CR_AFTER_CHUNK:
1364  // we've read exactly m_size_of_current_chunk bytes since starting the current chunk
1365  if (*m_read_ptr == '\x0D') {
1366  m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
1367  } else {
1368  set_error(ec, ERROR_CHUNK_CHAR);
1369  return false;
1370  }
1371  break;
1372 
1373  case PARSE_EXPECTING_LF_AFTER_CHUNK:
1374  // we received a CR; expecting LF to follow
1375  if (*m_read_ptr == '\x0A') {
1376  m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
1377  } else {
1378  set_error(ec, ERROR_CHUNK_CHAR);
1379  return false;
1380  }
1381  break;
1382 
1383  case PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK:
1384  // we've read the final chunk; expecting final CRLF
1385  if (*m_read_ptr == '\x0D') {
1386  m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
1387  } else {
1388  // Packet contains footers; Chunk parsing is commplete
1389  // Footer data contains name value pairs to be added to HTTP Message
1390  m_message_parse_state = PARSE_FOOTERS;
1391  m_headers_parse_state = PARSE_HEADER_START;
1392  m_bytes_last_read = (m_read_ptr - read_start_ptr);
1393  m_bytes_total_read += m_bytes_last_read;
1394  m_bytes_content_read += m_bytes_last_read;
1395  PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
1396  return true;
1397  }
1398  break;
1399 
1400  case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
1401  // we received the final CR; expecting LF to follow
1402  if (*m_read_ptr == '\x0A') {
1403  ++m_read_ptr;
1404  m_bytes_last_read = (m_read_ptr - read_start_ptr);
1405  m_bytes_total_read += m_bytes_last_read;
1406  m_bytes_content_read += m_bytes_last_read;
1407  PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
1408  return true;
1409  } else {
1410  set_error(ec, ERROR_CHUNK_CHAR);
1411  return false;
1412  }
1413  }
1414 
1415  ++m_read_ptr;
1416  }
1417 
1418  m_bytes_last_read = (m_read_ptr - read_start_ptr);
1419  m_bytes_total_read += m_bytes_last_read;
1420  m_bytes_content_read += m_bytes_last_read;
1421  return boost::indeterminate;
1422 }
1423 
1424 boost::tribool parser::consume_content(http::message& http_msg,
1425  boost::system::error_code& ec)
1426 {
1427  size_t content_bytes_to_read;
1428  size_t content_bytes_available = bytes_available();
1429  boost::tribool rc = boost::indeterminate;
1430 
1431  if (m_bytes_content_remaining == 0) {
1432  // we have all of the remaining payload content
1433  return true;
1434  } else {
1435  if (content_bytes_available >= m_bytes_content_remaining) {
1436  // we have all of the remaining payload content
1437  rc = true;
1438  content_bytes_to_read = m_bytes_content_remaining;
1439  } else {
1440  // only some of the payload content is available
1441  content_bytes_to_read = content_bytes_available;
1442  }
1443  m_bytes_content_remaining -= content_bytes_to_read;
1444  }
1445 
1446  // make sure content buffer is not already full
1447  if (m_payload_handler) {
1448  m_payload_handler(m_read_ptr, content_bytes_to_read);
1449  } else if (m_bytes_content_read < m_max_content_length) {
1450  if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
1451  // read would exceed maximum size for content buffer
1452  // copy only enough bytes to fill up the content buffer
1453  memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr,
1454  m_max_content_length - m_bytes_content_read);
1455  } else {
1456  // copy all bytes available
1457  memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
1458  }
1459  }
1460 
1461  m_read_ptr += content_bytes_to_read;
1462  m_bytes_content_read += content_bytes_to_read;
1463  m_bytes_total_read += content_bytes_to_read;
1464  m_bytes_last_read = content_bytes_to_read;
1465 
1466  return rc;
1467 }
1468 
1470 {
1471  if (bytes_available() == 0) {
1472  m_bytes_last_read = 0;
1473  } else {
1474  // note: m_bytes_last_read must be > 0 because of bytes_available() check
1475  m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
1476  if (m_payload_handler) {
1477  m_payload_handler(m_read_ptr, m_bytes_last_read);
1478  m_read_ptr += m_bytes_last_read;
1479  } else {
1480  while (m_read_ptr < m_read_end_ptr) {
1481  if (chunks.size() < m_max_content_length)
1482  chunks.push_back(*m_read_ptr);
1483  ++m_read_ptr;
1484  }
1485  }
1486  m_bytes_total_read += m_bytes_last_read;
1487  m_bytes_content_read += m_bytes_last_read;
1488  }
1489  return m_bytes_last_read;
1490 }
1491 
1492 void parser::finish(http::message& http_msg) const
1493 {
1494  switch (m_message_parse_state) {
1495  case PARSE_START:
1496  http_msg.set_is_valid(false);
1497  http_msg.set_content_length(0);
1498  http_msg.create_content_buffer();
1499  return;
1500  case PARSE_END:
1501  http_msg.set_is_valid(true);
1502  break;
1503  case PARSE_HEADERS:
1504  case PARSE_FOOTERS:
1505  http_msg.set_is_valid(false);
1507  http_msg.set_content_length(0);
1508  http_msg.create_content_buffer();
1509  break;
1510  case PARSE_CONTENT:
1511  http_msg.set_is_valid(false);
1512  if (get_content_bytes_read() < m_max_content_length) // NOTE: we can read more than we have allocated/stored
1514  break;
1515  case PARSE_CHUNKS:
1516  http_msg.set_is_valid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START);
1517  if (!m_payload_handler)
1518  http_msg.concatenate_chunks();
1519  break;
1520  case PARSE_CONTENT_NO_LENGTH:
1521  http_msg.set_is_valid(true);
1522  if (!m_payload_handler)
1523  http_msg.concatenate_chunks();
1524  break;
1525  }
1526 
1527  compute_msg_status(http_msg, http_msg.is_valid());
1528 
1529  if (is_parsing_request() && !m_payload_handler && !m_parse_headers_only) {
1530  // Parse query pairs from post content if content type is x-www-form-urlencoded.
1531  // Type could be followed by parameters (as defined in section 3.6 of RFC 2616)
1532  // e.g. Content-Type: application/x-www-form-urlencoded; charset=UTF-8
1533  http::request& http_request(dynamic_cast<http::request&>(http_msg));
1534  const std::string& content_type_header = http_request.get_header(http::types::HEADER_CONTENT_TYPE);
1535  if (content_type_header.compare(0, http::types::CONTENT_TYPE_URLENCODED.length(),
1536  http::types::CONTENT_TYPE_URLENCODED) == 0)
1537  {
1538  if (! parse_url_encoded(http_request.get_queries(),
1539  http_request.get_content(),
1540  http_request.get_content_length()))
1541  PION_LOG_WARN(m_logger, "Request form data parsing failed (POST urlencoded)");
1542  } else if (content_type_header.compare(0, http::types::CONTENT_TYPE_MULTIPART_FORM_DATA.length(),
1543  http::types::CONTENT_TYPE_MULTIPART_FORM_DATA) == 0)
1544  {
1545  if (! parse_multipart_form_data(http_request.get_queries(),
1546  content_type_header,
1547  http_request.get_content(),
1548  http_request.get_content_length()))
1549  PION_LOG_WARN(m_logger, "Request form data parsing failed (POST multipart)");
1550  }
1551  }
1552 }
1553 
1554 void parser::compute_msg_status(http::message& http_msg, bool msg_parsed_ok )
1555 {
1556  http::message::data_status_t st = http::message::STATUS_NONE;
1557 
1558  if(http_msg.has_missing_packets()) {
1559  st = http_msg.has_data_after_missing_packets() ?
1560  http::message::STATUS_PARTIAL : http::message::STATUS_TRUNCATED;
1561  } else {
1562  st = msg_parsed_ok ? http::message::STATUS_OK : http::message::STATUS_TRUNCATED;
1563  }
1564 
1565  http_msg.set_status(st);
1566 }
1567 
1569 {
1570  static error_category_t UNIQUE_ERROR_CATEGORY;
1571  m_error_category_ptr = &UNIQUE_ERROR_CATEGORY;
1572 }
1573 
1574 bool parser::parse_forwarded_for(const std::string& header, std::string& public_ip)
1575 {
1576  // static regex's used to check for ipv4 address
1577  static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}");
1578 
1584  static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}");
1585 
1586  // sanity check
1587  if (header.empty())
1588  return false;
1589 
1590  // local variables re-used by while loop
1591  boost::match_results<std::string::const_iterator> m;
1592  std::string::const_iterator start_it = header.begin();
1593 
1594  // search for next ip address within the header
1595  while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) {
1596  // get ip that matched
1597  std::string ip_str(m[0].first, m[0].second);
1598  // check if public network ip address
1599  if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) {
1600  // match found!
1601  public_ip = ip_str;
1602  return true;
1603  }
1604  // update search starting position
1605  start_it = m[0].second;
1606  }
1607 
1608  // no matches found
1609  return false;
1610 }
1611 
1612 } // end namespace http
1613 } // end namespace pion
static const boost::uint32_t COOKIE_NAME_MAX
maximum length for the name of a cookie name
Definition: parser.hpp:558
static void create_error_category(void)
creates the unique parser error_category_t
static const std::size_t DEFAULT_CONTENT_MAX
maximum length for HTTP payload content
Definition: parser.hpp:46
boost::tribool parse_missing_data(http::message &http_msg, std::size_t len, boost::system::error_code &ec)
void update_content_length_using_header(void)
sets the length of the payload content using the Content-Length header
Definition: message.hpp:314
static bool parse_cookie_header(ihash_multimap &dict, const char *ptr, const std::size_t len, bool set_cookie_header)
virtual bool is_content_length_implied(void) const =0
should return true if the content length can be implied without headers
void update_message_with_header_data(http::message &http_msg) const
static const boost::uint32_t QUERY_NAME_MAX
maximum length for the name of a query string variable
Definition: parser.hpp:552
static const boost::uint32_t HEADER_VALUE_MAX
maximum length for an HTTP header value
Definition: parser.hpp:549
const char * m_read_end_ptr
points to the end of the read_buffer (last byte + 1)
Definition: parser.hpp:574
void add_header(const std::string &key, const std::string &value)
adds a value for the HTTP header named key
Definition: message.hpp:363
void set_status_code(unsigned int n)
sets the HTTP response status code
Definition: response.hpp:109
static const boost::uint32_t STATUS_MESSAGE_MAX
maximum length for response status message
Definition: parser.hpp:534
static const boost::uint32_t RESOURCE_MAX
maximum length for the resource requested
Definition: parser.hpp:540
logger m_logger
primary logging interface used by this class
Definition: parser.hpp:565
class-specific error category
Definition: parser.hpp:74
void set_is_valid(bool b=true)
sets whether or not the message is valid
Definition: message.hpp:281
std::size_t get_content_bytes_read(void) const
returns the total number of bytes read while parsing the payload content
Definition: parser.hpp:261
void set_version_minor(const boost::uint16_t n)
sets the minor HTTP version number
Definition: message.hpp:296
bool is_valid(void) const
returns true if the message is valid
Definition: message.hpp:166
void set_version_major(const boost::uint16_t n)
sets the major HTTP version number
Definition: message.hpp:290
void set_missing_packets(bool newVal)
set to true when missing packets detected
Definition: message.hpp:273
static void set_error(boost::system::error_code &ec, error_value_t ev)
Definition: parser.hpp:516
void concatenate_chunks(void)
bool eof(void) const
returns true if there are no more bytes available in the read buffer
Definition: parser.hpp:249
boost::tribool parse_headers(http::message &http_msg, boost::system::error_code &ec)
static bool parse_forwarded_for(const std::string &header, std::string &public_ip)
bool has_data_after_missing_packets() const
true if more data seen after the missing packets
Definition: message.hpp:276
const std::string & get_header(const std::string &key) const
returns a value for the header if any are defined; otherwise, an empty string
Definition: message.hpp:213
ihash_multimap & get_headers(void)
returns a reference to the HTTP headers
Definition: message.hpp:218
static const boost::uint32_t QUERY_STRING_MAX
maximum length for the query string
Definition: parser.hpp:543
boost::uint16_t get_version_minor(void) const
returns the minor HTTP version number
Definition: message.hpp:180
void set_content_length(size_t n)
sets the length of the payload content (in bytes)
Definition: message.hpp:302
static void compute_msg_status(http::message &http_msg, bool msg_parsed_ok)
std::vector< char > chunk_cache_t
used to cache chunked data
Definition: message.hpp:64
std::size_t bytes_available(void) const
returns the number of bytes available in the read buffer
Definition: parser.hpp:252
std::size_t consume_content_as_next_chunk(http::message::chunk_cache_t &chunk_buffers)
boost::tribool parse(http::message &http_msg, boost::system::error_code &ec)
Definition: http_parser.cpp:46
const bool m_is_request
true if the message is an HTTP request; false if it is an HTTP response
Definition: parser.hpp:568
bool has_missing_packets() const
true if there were missing packets
Definition: message.hpp:270
void set_method(const std::string &str)
sets the HTTP request method (i.e. GET, POST, PUT)
Definition: request.hpp:87
virtual void finished_parsing_headers(const boost::system::error_code &ec)
Called after we have finished parsing the HTTP message headers.
Definition: parser.hpp:443
static const boost::uint32_t COOKIE_VALUE_MAX
maximum length for the value of a cookie; also used for path and domain
Definition: parser.hpp:561
chunk_cache_t & get_chunk_cache(void)
returns a reference to the chunk cache
Definition: message.hpp:210
bool is_parsing_request(void) const
returns true if the parser is being used to parse an HTTP request
Definition: parser.hpp:276
const char * m_read_ptr
points to the next character to be consumed in the read_buffer
Definition: parser.hpp:571
static std::string url_decode(const std::string &str)
escapes URL-encoded strings (a%20value+with%20spaces)
Definition: algorithm.cpp:153
void set_status_message(const std::string &msg)
sets the HTTP response status message
Definition: response.hpp:115
static bool parse_url_encoded(ihash_multimap &dict, const char *ptr, const std::size_t len)
void finish(http::message &http_msg) const
size_t get_content_length(void) const
returns the length of the payload content (in bytes)
Definition: message.hpp:192
static const boost::uint32_t HEADER_NAME_MAX
maximum length for an HTTP header name
Definition: parser.hpp:546
boost::tribool consume_content(http::message &http_msg, boost::system::error_code &ec)
void update_transfer_encoding_using_header(void)
sets the transfer coding using the Transfer-Encoding header
Definition: message.hpp:326
char * create_content_buffer(void)
Definition: message.hpp:338
static const boost::uint32_t QUERY_VALUE_MAX
maximum length for the value of a query string variable
Definition: parser.hpp:555
data_status_t
defines message data integrity status codes
Definition: message.hpp:87
boost::tribool parse_chunks(http::message::chunk_cache_t &chunk_buffers, boost::system::error_code &ec)
char * get_content(void)
returns a pointer to the payload content, or empty string if there is none
Definition: message.hpp:204
void set_query_string(const std::string &str)
sets the uri-query or query string requested
Definition: request.hpp:102
static bool parse_multipart_form_data(ihash_multimap &dict, const std::string &content_type, const char *ptr, const std::size_t len)
boost::tribool finish_header_parsing(http::message &http_msg, boost::system::error_code &ec)
bool is_chunked(void) const
returns true if the message content is chunked
Definition: message.hpp:195
boost::uint16_t get_version_major(void) const
returns the major HTTP version number
Definition: message.hpp:177
ihash_multimap & get_queries(void)
returns the query parameters
Definition: request.hpp:77
ihash_multimap & get_cookies(void)
returns the cookie parameters
Definition: message.hpp:234
bool has_header(const std::string &key) const
returns true if at least one value for the header is defined
Definition: message.hpp:223
static const boost::uint32_t METHOD_MAX
maximum length for the request method
Definition: parser.hpp:537
void set_resource(const std::string &str)
sets the resource or uri-stem originally requested
Definition: request.hpp:93
static bool parse_uri(const std::string &uri, std::string &proto, std::string &host, boost::uint16_t &port, std::string &path, std::string &query)