1/* Copyright (c) 2014, Google Inc.
2 *
3 * Permission to use, copy, modify, and/or distribute this software for any
4 * purpose with or without fee is hereby granted, provided that the above
5 * copyright notice and this permission notice appear in all copies.
6 *
7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15#include <stdio.h>
16#include <string.h>
17
18#include <string>
19#include <vector>
20
21#include <gtest/gtest.h>
22
23#include <openssl/base64.h>
24#include <openssl/crypto.h>
25#include <openssl/err.h>
26
27#include "../internal.h"
28#include "../test/test_util.h"
29
30
31enum encoding_relation {
32  // canonical indicates that the encoding is the expected encoding of the
33  // input.
34  canonical,
35  // valid indicates that the encoding is /a/ valid encoding of the input, but
36  // need not be the canonical one.
37  valid,
38  // invalid indicates that the encoded data is valid.
39  invalid,
40};
41
42struct TestVector {
43  enum encoding_relation relation;
44  const char *decoded;
45  const char *encoded;
46};
47
48// Test vectors from RFC 4648.
49static const TestVector kTestVectors[] = {
50    {canonical, "", ""},
51    {canonical, "f", "Zg==\n"},
52    {canonical, "fo", "Zm8=\n"},
53    {canonical, "foo", "Zm9v\n"},
54    {canonical, "foob", "Zm9vYg==\n"},
55    {canonical, "fooba", "Zm9vYmE=\n"},
56    {canonical, "foobar", "Zm9vYmFy\n"},
57    {valid, "foobar", "Zm9vYmFy\n\n"},
58    {valid, "foobar", " Zm9vYmFy\n\n"},
59    {valid, "foobar", " Z m 9 v Y m F y\n\n"},
60    {invalid, "", "Zm9vYmFy=\n"},
61    {invalid, "", "Zm9vYmFy==\n"},
62    {invalid, "", "Zm9vYmFy===\n"},
63    {invalid, "", "Z"},
64    {invalid, "", "Z\n"},
65    {invalid, "", "ab!c"},
66    {invalid, "", "ab=c"},
67    {invalid, "", "abc"},
68
69    {canonical, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
70     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eA==\n"},
71    {valid, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
72     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eA\n==\n"},
73    {valid, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
74     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eA=\n=\n"},
75    {invalid, "",
76     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eA=\n==\n"},
77    {canonical, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
78     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4\neHh4eHh"
79     "4eHh4eHh4\n"},
80    {canonical,
81     "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
82     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4\neHh4eHh"
83     "4eHh4eHh4eHh4eA==\n"},
84    {valid, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
85     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh\n4eHh4eHh"
86     "4eHh4eHh4eHh4eA==\n"},
87    {valid, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
88     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4e"
89     "Hh4eHh4eHh4eA==\n"},
90    {invalid, "",
91     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eA=="
92     "\neHh4eHh4eHh4eHh4eHh4eHh4\n"},
93
94    // A '-' has traditionally been treated as the end of the data by OpenSSL
95    // and anything following would be ignored. BoringSSL does not accept this
96    // non-standard extension.
97    {invalid, "", "Zm9vYmFy-anythinggoes"},
98    {invalid, "", "Zm9vYmFy\n-anythinggoes"},
99
100    // CVE-2015-0292
101    {invalid, "",
102     "ZW5jb2RlIG1lCg==========================================================="
103     "=======\n"},
104};
105
106class Base64Test : public testing::TestWithParam<TestVector> {};
107
108INSTANTIATE_TEST_CASE_P(, Base64Test, testing::ValuesIn(kTestVectors));
109
110// RemoveNewlines returns a copy of |in| with all '\n' characters removed.
111static std::string RemoveNewlines(const char *in) {
112  std::string ret;
113  const size_t in_len = strlen(in);
114
115  for (size_t i = 0; i < in_len; i++) {
116    if (in[i] != '\n') {
117      ret.push_back(in[i]);
118    }
119  }
120
121  return ret;
122}
123
124TEST_P(Base64Test, EncodeBlock) {
125  const TestVector &t = GetParam();
126  if (t.relation != canonical) {
127    return;
128  }
129
130  const size_t decoded_len = strlen(t.decoded);
131  size_t max_encoded_len;
132  ASSERT_TRUE(EVP_EncodedLength(&max_encoded_len, decoded_len));
133
134  std::vector<uint8_t> out_vec(max_encoded_len);
135  uint8_t *out = out_vec.data();
136  size_t len = EVP_EncodeBlock(out, (const uint8_t *)t.decoded, decoded_len);
137
138  std::string encoded(RemoveNewlines(t.encoded));
139  EXPECT_EQ(Bytes(encoded), Bytes(out, len));
140}
141
142TEST_P(Base64Test, DecodeBase64) {
143  const TestVector &t = GetParam();
144  if (t.relation == valid) {
145    // The non-canonical encodings will generally have odd whitespace etc
146    // that |EVP_DecodeBase64| will reject.
147    return;
148  }
149
150  const std::string encoded(RemoveNewlines(t.encoded));
151  std::vector<uint8_t> out_vec(encoded.size());
152  uint8_t *out = out_vec.data();
153
154  size_t len;
155  int ok = EVP_DecodeBase64(out, &len, out_vec.size(),
156                            (const uint8_t *)encoded.data(), encoded.size());
157
158  if (t.relation == invalid) {
159    EXPECT_FALSE(ok);
160  } else if (t.relation == canonical) {
161    ASSERT_TRUE(ok);
162    EXPECT_EQ(Bytes(t.decoded), Bytes(out, len));
163  }
164}
165
166TEST_P(Base64Test, DecodeBlock) {
167  const TestVector &t = GetParam();
168  if (t.relation != canonical) {
169    return;
170  }
171
172  std::string encoded(RemoveNewlines(t.encoded));
173
174  std::vector<uint8_t> out_vec(encoded.size());
175  uint8_t *out = out_vec.data();
176
177  // Test that the padding behavior of the deprecated API is preserved.
178  int ret =
179      EVP_DecodeBlock(out, (const uint8_t *)encoded.data(), encoded.size());
180  ASSERT_GE(ret, 0);
181  // EVP_DecodeBlock should ignore padding.
182  ASSERT_EQ(0, ret % 3);
183  size_t expected_len = strlen(t.decoded);
184  if (expected_len % 3 != 0) {
185    ret -= 3 - (expected_len % 3);
186  }
187  EXPECT_EQ(Bytes(t.decoded), Bytes(out, static_cast<size_t>(ret)));
188}
189
190TEST_P(Base64Test, EncodeDecode) {
191  const TestVector &t = GetParam();
192
193  EVP_ENCODE_CTX ctx;
194  const size_t decoded_len = strlen(t.decoded);
195
196  if (t.relation == canonical) {
197    size_t max_encoded_len;
198    ASSERT_TRUE(EVP_EncodedLength(&max_encoded_len, decoded_len));
199
200    // EVP_EncodeUpdate will output new lines every 64 bytes of output so we
201    // need slightly more than |EVP_EncodedLength| returns. */
202    max_encoded_len += (max_encoded_len + 63) >> 6;
203    std::vector<uint8_t> out_vec(max_encoded_len);
204    uint8_t *out = out_vec.data();
205
206    EVP_EncodeInit(&ctx);
207
208    int out_len;
209    EVP_EncodeUpdate(&ctx, out, &out_len,
210                     reinterpret_cast<const uint8_t *>(t.decoded),
211                     decoded_len);
212    size_t total = out_len;
213
214    EVP_EncodeFinal(&ctx, out + total, &out_len);
215    total += out_len;
216
217    EXPECT_EQ(Bytes(t.encoded), Bytes(out, total));
218  }
219
220  std::vector<uint8_t> out_vec(strlen(t.encoded));
221  uint8_t *out = out_vec.data();
222
223  EVP_DecodeInit(&ctx);
224  int out_len;
225  size_t total = 0;
226  int ret = EVP_DecodeUpdate(&ctx, out, &out_len,
227                             reinterpret_cast<const uint8_t *>(t.encoded),
228                             strlen(t.encoded));
229  if (ret != -1) {
230    total = out_len;
231    ret = EVP_DecodeFinal(&ctx, out + total, &out_len);
232    total += out_len;
233  }
234
235  switch (t.relation) {
236    case canonical:
237    case valid:
238      ASSERT_NE(-1, ret);
239      EXPECT_EQ(Bytes(t.decoded), Bytes(out, total));
240      break;
241
242    case invalid:
243      EXPECT_EQ(-1, ret);
244      break;
245  }
246}
247
248TEST_P(Base64Test, DecodeUpdateStreaming) {
249  const TestVector &t = GetParam();
250  if (t.relation == invalid) {
251    return;
252  }
253
254  const size_t encoded_len = strlen(t.encoded);
255
256  std::vector<uint8_t> out(encoded_len);
257
258  for (size_t chunk_size = 1; chunk_size <= encoded_len; chunk_size++) {
259    SCOPED_TRACE(chunk_size);
260    size_t out_len = 0;
261    EVP_ENCODE_CTX ctx;
262    EVP_DecodeInit(&ctx);
263
264    for (size_t i = 0; i < encoded_len;) {
265      size_t todo = encoded_len - i;
266      if (todo > chunk_size) {
267        todo = chunk_size;
268      }
269
270      int bytes_written;
271      int ret = EVP_DecodeUpdate(
272          &ctx, out.data() + out_len, &bytes_written,
273          reinterpret_cast<const uint8_t *>(t.encoded + i), todo);
274      i += todo;
275
276      switch (ret) {
277        case -1:
278          FAIL() << "EVP_DecodeUpdate failed";
279        case 0:
280          out_len += bytes_written;
281          if (i == encoded_len ||
282              (i + 1 == encoded_len && t.encoded[i] == '\n') ||
283              // If there was an '-' in the input (which means “EOF”) then
284              // this loop will continue to test that |EVP_DecodeUpdate| will
285              // ignore the remainder of the input.
286              strchr(t.encoded, '-') != nullptr) {
287            break;
288          }
289
290          FAIL()
291              << "EVP_DecodeUpdate returned zero before end of encoded data.";
292        case 1:
293          out_len += bytes_written;
294          break;
295        default:
296          FAIL() << "Invalid return value " << ret;
297      }
298    }
299
300    int bytes_written;
301    int ret = EVP_DecodeFinal(&ctx, out.data() + out_len, &bytes_written);
302    ASSERT_NE(ret, -1);
303    out_len += bytes_written;
304
305    EXPECT_EQ(Bytes(t.decoded), Bytes(out.data(), out_len));
306  }
307}
308