pion  5.0.6
algorithm.cpp
1 // ---------------------------------------------------------------------
2 // pion: a Boost C++ framework for building lightweight HTTP interfaces
3 // ---------------------------------------------------------------------
4 // Copyright (C) 2007-2014 Splunk Inc. (https://github.com/splunk/pion)
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // See http://www.boost.org/LICENSE_1_0.txt
8 //
9 
10 #include <cmath>
11 #include <cstdlib>
12 #include <cstdio>
13 #include <cstring>
14 #include <pion/algorithm.hpp>
15 #include <boost/assert.hpp>
16 
17 // macro to shift bitmask by a single bit
18 #define SHIFT_BITMASK(ptr, mask) if (mask & 0x01) { mask = 0x80; ++ptr; } else mask >>= 1;
19 
20 
21 namespace pion { // begin namespace pion
22 
23 
24 bool algorithm::base64_decode(const std::string &input, std::string &output)
25 {
26  static const char nop = -1;
27  static const char decoding_data[] = {
28  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
29  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
30  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop, 62, nop,nop,nop, 63,
31  52, 53, 54, 55, 56, 57, 58, 59, 60, 61,nop,nop, nop,nop,nop,nop,
32  nop, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
33  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,nop, nop,nop,nop,nop,
34  nop,26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
35  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,nop, nop,nop,nop,nop,
36  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
37  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
38  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
39  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
40  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
41  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
42  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop,
43  nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop
44  };
45 
46  unsigned int input_length=input.size();
47  const char * input_ptr = input.data();
48 
49  // allocate space for output string
50  output.clear();
51  output.reserve(((input_length+2)/3)*4);
52 
53  // for each 4-bytes sequence from the input, extract 4 6-bits sequences by droping first two bits
54  // and regenerate into 3 8-bits sequence
55 
56  for (unsigned int i=0; i<input_length;i++) {
57  char base64code0;
58  char base64code1;
59  char base64code2 = 0; // initialized to 0 to suppress warnings
60  char base64code3;
61 
62  base64code0 = decoding_data[static_cast<int>(input_ptr[i])];
63  if(base64code0==nop) // non base64 character
64  return false;
65  if(!(++i<input_length)) // we need at least two input bytes for first byte output
66  return false;
67  base64code1 = decoding_data[static_cast<int>(input_ptr[i])];
68  if(base64code1==nop) // non base64 character
69  return false;
70 
71  output += ((base64code0 << 2) | ((base64code1 >> 4) & 0x3));
72 
73  if(++i<input_length) {
74  char c = input_ptr[i];
75  if(c =='=') { // padding , end of input
76  BOOST_ASSERT( (base64code1 & 0x0f)==0);
77  return true;
78  }
79  base64code2 = decoding_data[static_cast<int>(input_ptr[i])];
80  if(base64code2==nop) // non base64 character
81  return false;
82 
83  output += ((base64code1 << 4) & 0xf0) | ((base64code2 >> 2) & 0x0f);
84  }
85 
86  if(++i<input_length) {
87  char c = input_ptr[i];
88  if(c =='=') { // padding , end of input
89  BOOST_ASSERT( (base64code2 & 0x03)==0);
90  return true;
91  }
92  base64code3 = decoding_data[static_cast<int>(input_ptr[i])];
93  if(base64code3==nop) // non base64 character
94  return false;
95 
96  output += (((base64code2 << 6) & 0xc0) | base64code3 );
97  }
98 
99  }
100 
101  return true;
102 }
103 
104 bool algorithm::base64_encode(const std::string &input, std::string &output)
105 {
106  static const char encoding_data[] =
107  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
108 
109  unsigned int input_length=input.size();
110  const char * input_ptr = input.data();
111 
112  // allocate space for output string
113  output.clear();
114  output.reserve(((input_length+2)/3)*4);
115 
116  // for each 3-bytes sequence from the input, extract 4 6-bits sequences and encode using
117  // encoding_data lookup table.
118  // if input do not contains enough chars to complete 3-byte sequence,use pad char '='
119  for (unsigned int i=0; i<input_length;i++) {
120  int base64code0=0;
121  int base64code1=0;
122  int base64code2=0;
123  int base64code3=0;
124 
125  base64code0 = (input_ptr[i] >> 2) & 0x3f; // 1-byte 6 bits
126  output += encoding_data[base64code0];
127  base64code1 = (input_ptr[i] << 4 ) & 0x3f; // 1-byte 2 bits +
128 
129  if (++i < input_length) {
130  base64code1 |= (input_ptr[i] >> 4) & 0x0f; // 2-byte 4 bits
131  output += encoding_data[base64code1];
132  base64code2 = (input_ptr[i] << 2) & 0x3f; // 2-byte 4 bits +
133 
134  if (++i < input_length) {
135  base64code2 |= (input_ptr[i] >> 6) & 0x03; // 3-byte 2 bits
136  base64code3 = input_ptr[i] & 0x3f; // 3-byte 6 bits
137  output += encoding_data[base64code2];
138  output += encoding_data[base64code3];
139  } else {
140  output += encoding_data[base64code2];
141  output += '=';
142  }
143  } else {
144  output += encoding_data[base64code1];
145  output += '=';
146  output += '=';
147  }
148  }
149 
150  return true;
151 }
152 
153 std::string algorithm::url_decode(const std::string& str)
154 {
155  char decode_buf[3];
156  std::string result;
157  result.reserve(str.size());
158 
159  for (std::string::size_type pos = 0; pos < str.size(); ++pos) {
160  switch(str[pos]) {
161  case '+':
162  // convert to space character
163  result += ' ';
164  break;
165  case '%':
166  // decode hexadecimal value
167  if (pos + 2 < str.size()) {
168  decode_buf[0] = str[++pos];
169  decode_buf[1] = str[++pos];
170  decode_buf[2] = '\0';
171 
172  char decoded_char = static_cast<char>( strtol(decode_buf, 0, 16) );
173 
174  // decoded_char will be '\0' if strtol can't parse decode_buf as hex
175  // (or if decode_buf == "00", which is also not valid).
176  // In this case, recover from error by not decoding.
177  if (decoded_char == '\0') {
178  result += '%';
179  pos -= 2;
180  } else
181  result += decoded_char;
182  } else {
183  // recover from error by not decoding character
184  result += '%';
185  }
186  break;
187  default:
188  // character does not need to be escaped
189  result += str[pos];
190  }
191  };
192 
193  return result;
194 }
195 
196 std::string algorithm::url_encode(const std::string& str)
197 {
198  char encode_buf[4];
199  std::string result;
200  encode_buf[0] = '%';
201  result.reserve(str.size());
202 
203  // character selection for this algorithm is based on the following url:
204  // http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
205 
206  for (std::string::size_type pos = 0; pos < str.size(); ++pos) {
207  switch(str[pos]) {
208  default:
209  if (str[pos] > 32 && str[pos] < 127) {
210  // character does not need to be escaped
211  result += str[pos];
212  break;
213  }
214  // else pass through to next case
215  case ' ':
216  case '$': case '&': case '+': case ',': case '/': case ':':
217  case ';': case '=': case '?': case '@': case '"': case '<':
218  case '>': case '#': case '%': case '{': case '}': case '|':
219  case '\\': case '^': case '~': case '[': case ']': case '`':
220  // the character needs to be encoded
221  sprintf(encode_buf+1, "%.2X", (unsigned char)(str[pos]));
222  result += encode_buf;
223  break;
224  }
225  };
226 
227  return result;
228 }
229 
230 // TODO
231 //std::string algorithm::xml_decode(const std::string& str)
232 //{
233 //}
234 
235 std::string algorithm::xml_encode(const std::string& str)
236 {
237  std::string result;
238  result.reserve(str.size() + 20); // Assume ~5 characters converted (length increases)
239  const unsigned char *ptr = reinterpret_cast<const unsigned char*>(str.c_str());
240  const unsigned char *end_ptr = ptr + str.size();
241  while (ptr < end_ptr) {
242  // check byte ranges for valid UTF-8
243  // see http://en.wikipedia.org/wiki/UTF-8
244  // also, see http://www.w3.org/TR/REC-xml/#charsets
245  // this implementation is the strictest subset of both
246  if ((*ptr >= 0x20 && *ptr <= 0x7F) || *ptr == 0x9 || *ptr == 0xa || *ptr == 0xd) {
247  // regular ASCII character
248  switch(*ptr) {
249  // Escape special XML characters.
250  case '&':
251  result += "&amp;";
252  break;
253  case '<':
254  result += "&lt;";
255  break;
256  case '>':
257  result += "&gt;";
258  break;
259  case '\"':
260  result += "&quot;";
261  break;
262  case '\'':
263  result += "&apos;";
264  break;
265  default:
266  result += *ptr;
267  }
268  } else if (*ptr >= 0xC2 && *ptr <= 0xDF) {
269  // two-byte sequence
270  if (*(ptr+1) >= 0x80 && *(ptr+1) <= 0xBF) {
271  result += *ptr;
272  result += *(++ptr);
273  } else {
274  // insert replacement char
275  result += 0xef;
276  result += 0xbf;
277  result += 0xbd;
278  }
279  } else if (*ptr >= 0xE0 && *ptr <= 0xEF) {
280  // three-byte sequence
281  if (*(ptr+1) >= 0x80 && *(ptr+1) <= 0xBF
282  && *(ptr+2) >= 0x80 && *(ptr+2) <= 0xBF) {
283  result += *ptr;
284  result += *(++ptr);
285  result += *(++ptr);
286  } else {
287  // insert replacement char
288  result += 0xef;
289  result += 0xbf;
290  result += 0xbd;
291  }
292  } else if (*ptr >= 0xF0 && *ptr <= 0xF4) {
293  // four-byte sequence
294  if (*(ptr+1) >= 0x80 && *(ptr+1) <= 0xBF
295  && *(ptr+2) >= 0x80 && *(ptr+2) <= 0xBF
296  && *(ptr+3) >= 0x80 && *(ptr+3) <= 0xBF) {
297  result += *ptr;
298  result += *(++ptr);
299  result += *(++ptr);
300  result += *(++ptr);
301  } else {
302  // insert replacement char
303  result += 0xef;
304  result += 0xbf;
305  result += 0xbd;
306  }
307  } else {
308  // insert replacement char
309  result += 0xef;
310  result += 0xbf;
311  result += 0xbd;
312  }
313  ++ptr;
314  }
315 
316  return result;
317 }
318 
319 void algorithm::float_from_bytes(long double& value, const unsigned char *ptr, size_t num_exp_bits, size_t num_fraction_bits)
320 {
321  // get sign of the number from the first bit
322  const int value_sign = (*ptr & 0x80) ? -1 : 1;
323 
324  // build exponent value from bitstream
325  unsigned char mask = 0x80;
326  boost::int16_t exponent = 0;
327  for (size_t n = 0; n < num_exp_bits; ++n) {
328  SHIFT_BITMASK(ptr, mask);
329  exponent *= 2;
330  if (*ptr & mask)
331  exponent += 1;
332  }
333 
334  // build significand from bitstream
335  long double significand = exponent ? 1.0 : 0.0;
336  long double significand_value = 1.0;
337  while (num_fraction_bits) {
338  SHIFT_BITMASK(ptr, mask);
339  significand_value /= 2;
340  if (*ptr & mask)
341  significand += significand_value;
342  --num_fraction_bits;
343  }
344 
345  // calculate final value
346  exponent -= (::pow((long double)2, (int)(num_exp_bits - 1)) - 1);
347  value = value_sign * significand * ::pow((long double)2, exponent);
348 }
349 
350 void algorithm::float_to_bytes(long double value, unsigned char *buf, size_t num_exp_bits, size_t num_fraction_bits)
351 {
352  // first initialize output buffer to zeros
353  unsigned char *ptr = buf;
354  memset(ptr, 0x00, ::ceil(static_cast<float>(num_exp_bits + num_fraction_bits + 1) / 8));
355 
356  // initialize first byte starting with sign of number
357  if (value < 0) {
358  *ptr = 0x80;
359  value *= -1;
360  }
361 
362  // break down numbers >= 1.0 by incrementing the exponent & dividing by 2
363  boost::int16_t exponent = 0;
364  while (value >= 1) {
365  value /= 2;
366  ++exponent;
367  }
368 
369  // skip past exponent bits because we don't know the value yet
370  unsigned char mask = 0x40;
371  for (size_t n = num_exp_bits; n > 0; --n) {
372  if (n >= 8) {
373  ++ptr;
374  n -= 7;
375  } else {
376  SHIFT_BITMASK(ptr, mask);
377  }
378  }
379 
380  // serialize fractional value < 1.0
381  bool got_exponent = false;
382  boost::uint16_t num_bits = 0;
383  while (value && num_bits < num_fraction_bits) {
384  value *= 2;
385  if (got_exponent) {
386  if (value >= 1.0) {
387  *ptr |= mask;
388  value -= 1.0;
389  }
390  SHIFT_BITMASK(ptr, mask);
391  ++num_bits;
392  } else {
393  --exponent;
394  if (value >= 1.0) {
395  value -= 1.0;
396  got_exponent = true;
397  }
398  }
399  }
400 
401  // normalize exponent.
402  // note: we should have a zero exponent if value == 0
403  boost::int32_t high_bit = ::pow((long double)2, (int)(num_exp_bits - 1));
404  if (got_exponent)
405  exponent += (high_bit - 1);
406  else
407  exponent = 0;
408 
409  // serialize exponent bits
410  ptr = buf;
411  mask = 0x80;
412  for (size_t n = 0; n < num_exp_bits; ++n) {
413  SHIFT_BITMASK(ptr, mask);
414  if (exponent >= high_bit) {
415  *ptr |= mask;
416  exponent -= high_bit;
417  }
418  high_bit /= 2;
419  }
420 }
421 
422 } // end namespace pion
static void float_to_bytes(long double value, unsigned char *ptr, size_t num_exp_bits, size_t num_fraction_bits)
Definition: algorithm.cpp:350
static std::string url_encode(const std::string &str)
encodes strings so that they are safe for URLs (with%20spaces)
Definition: algorithm.cpp:196
static bool base64_decode(std::string const &input, std::string &output)
Definition: algorithm.cpp:24
static std::string xml_encode(const std::string &str)
TODO: escapes XML/HTML-encoded strings (1 < 2)
Definition: algorithm.cpp:235
static std::string url_decode(const std::string &str)
escapes URL-encoded strings (a%20value+with%20spaces)
Definition: algorithm.cpp:153
static bool base64_encode(std::string const &input, std::string &output)
Definition: algorithm.cpp:104
static void float_from_bytes(long double &value, const unsigned char *ptr, size_t num_exp_bits, size_t num_fraction_bits)
Definition: algorithm.cpp:319