1/**
2 * Copyright(c) 2011 Trusted Logic.   All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *  * Neither the name Trusted Logic nor the names of its
15 *    contributors may be used to endorse or promote products derived
16 *    from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30#include "lib_manifest2.h"
31#include <string.h>
32
33#define CHAR_CR  (uint8_t)0x0D
34#define CHAR_LF  (uint8_t)0x0A
35#define CHAR_TAB (uint8_t)0x09
36
37#ifdef LIB_TOOL_IMPLEMENTATION
38#include "exos_trace.h"
39#define LOG_ERROR(pContext, msg, ...) log_error("%s - line %d: " msg, pContext->pManifestName, pContext->nLine, __VA_ARGS__)
40static void log_error(const char* msg, ...)
41{
42   va_list arg_list;
43   va_start(arg_list, msg);
44   exosTraceVPrintf("LIB_MANIFEST2", EXOS_TRACE_ORG_APPLI, K_PRINT_ERROR_LOG, msg, &arg_list);
45   va_end(arg_list);
46}
47#else
48/* No error messages on the target */
49#ifdef __SYMBIAN32__
50#define LOG_ERROR(pContext...)
51#else
52#define LOG_ERROR(...)
53#endif
54#endif
55
56void libManifest2InitContext(
57   LIB_MANIFEST2_CONTEXT* pContext)
58{
59   pContext->nOffset = 0;
60   pContext->nLine = 1;
61   pContext->nSectionStartOffset = 0;
62}
63
64
65#define CHARACTER_NAME_FIRST      1
66#define CHARACTER_NAME_SUBSEQUENT 2
67#define CHARACTER_SECTION_NAME    3
68
69static bool static_checkCharacter(uint8_t x, uint32_t nType)
70{
71   /* [A-Za-z0-9] is acceptable for everyone */
72   if (x  >= (uint8_t)'a' && x <= (uint8_t)'z')
73   {
74      return true;
75   }
76   if (x >=(uint8_t)'A' && x <= (uint8_t)'Z')
77   {
78      return true;
79   }
80   if (x >= (uint8_t)'0' && x <= (uint8_t)'9')
81   {
82      return true;
83   }
84   if (nType == CHARACTER_NAME_FIRST)
85   {
86      return false;
87   }
88   /* Subsequent property name or section name characters can be [_.-] */
89   if (x == (uint8_t)'_' || x == (uint8_t)'.' || x == (uint8_t)'-')
90   {
91      return true;
92   }
93   if (nType == CHARACTER_NAME_SUBSEQUENT)
94   {
95      return false;
96   }
97   /* Space is also allowed in section names */
98   if (x == (uint8_t)' ')
99   {
100      return true;
101   }
102   return false;
103}
104
105static bool static_sectionNameEqualCaseInsensitive(
106   uint8_t* pName1,
107   uint32_t nName1Length,
108   uint8_t* pName2,
109   uint32_t nName2Length)
110{
111   uint32_t i;
112   if (nName1Length != nName2Length)
113   {
114      return false;
115   }
116   for (i = 0; i < nName1Length; i++)
117   {
118      uint8_t x1 = pName1[i];
119      uint8_t x2 = pName2[i];
120
121      /* This code assumes the characters have been checked before */
122
123      if ((x1 & ~0x20) != (x2 & ~0x20))
124      {
125         return false;
126      }
127   }
128   return true;
129}
130
131static S_RESULT static_libManifest2GetNextItemInternal(
132   LIB_MANIFEST2_CONTEXT* pContext,
133   OUT uint8_t** ppName,
134   OUT uint32_t* pNameLength,
135   OUT uint8_t** ppValue,
136   OUT uint32_t* pValueLength)
137{
138   S_RESULT nResult = S_ERROR_BAD_FORMAT;
139   uint8_t* pCurrent = pContext->pManifestContent + pContext->nOffset;
140   uint8_t* pEnd = pContext->pManifestContent + pContext->nManifestLength;
141   uint8_t* pLastNonWhitespaceChar;
142   uint32_t nCurrentSequenceCount;
143   uint32_t nCurrentChar;
144
145   if (pContext->nType != LIB_MANIFEST2_TYPE_COMPILED)
146   {
147      /* Skip leading BOM if we're at the start */
148      if (pCurrent == pContext->pManifestContent)
149      {
150         /* We're at the start. Skip leading BOM if present */
151         /* Note that the UTF-8 encoding of the BOM marker is EF BB BF */
152         if (pContext->nManifestLength >= 3
153             && pCurrent[0] == 0xEF
154             && pCurrent[1] == 0xBB
155             && pCurrent[2] == 0xBF)
156         {
157            pCurrent += 3;
158         }
159      }
160      /* Skip comments and newlines */
161      while (pCurrent < pEnd)
162      {
163         if (*pCurrent == (uint8_t)'#')
164         {
165            /* This is the start of a comment. Skip until end of line or end of file */
166            pCurrent++;
167            while (pCurrent < pEnd && *pCurrent != CHAR_LF && *pCurrent != CHAR_CR)
168            {
169               if (*pCurrent == 0)
170               {
171                  LOG_ERROR(pContext, "NUL character forbidden");
172                  goto error;
173               }
174               pCurrent++;
175            }
176         }
177         else if (*pCurrent == CHAR_CR)
178         {
179            /* Check if a LF follows */
180            pCurrent++;
181            if (pCurrent < pEnd && *pCurrent == CHAR_LF)
182            {
183               pCurrent++;
184            }
185            pContext->nLine++;
186         }
187         else if (*pCurrent == CHAR_LF)
188         {
189            pCurrent++;
190            pContext->nLine++;
191         }
192         else if (*pCurrent == ' ' || *pCurrent == '\t')
193         {
194            /* this is the start of a all-whitespace line */
195            /* NOTE: this is not allowed by the current spec: spec update needed */
196            pCurrent++;
197            while (pCurrent < pEnd)
198            {
199               if (*pCurrent == CHAR_LF || *pCurrent == CHAR_CR)
200               {
201                  /* End-of-line reached */
202                  break;
203               }
204               if (! (*pCurrent == ' ' || *pCurrent == '\t'))
205               {
206                  LOG_ERROR(pContext, "A line starting with whitespaces must contain only whitespaces. Illegal character: 0x%02X", *pCurrent);
207                  goto error;
208               }
209               pCurrent++;
210            }
211         }
212         else
213         {
214            break;
215         }
216      }
217   }
218
219   if (pCurrent >= pEnd)
220   {
221      /* No more properties */
222      nResult = S_ERROR_ITEM_NOT_FOUND;
223      goto error;
224   }
225
226   if (pContext->nType == LIB_MANIFEST2_TYPE_SOURCE_WITH_SECTIONS)
227   {
228      if (*pCurrent == '[')
229      {
230         /* This is a section descriptor */
231         pCurrent++;
232         *ppName = pCurrent;
233         *ppValue = NULL;
234         *pValueLength = 0;
235         while (true)
236         {
237            if (pCurrent >= pEnd)
238            {
239               LOG_ERROR(pContext, "EOF reached within a section name");
240               goto error;
241            }
242            if (*pCurrent == ']')
243            {
244               /* End of section name */
245               *pNameLength = pCurrent - *ppName;
246               pCurrent++;
247
248               /* Skip spaces and tabs. Note that this is a deviation from the current spec
249                 (see SWIS). Spec must be updated */
250               while (pCurrent < pEnd)
251               {
252                  if (*pCurrent == ' ' || *pCurrent == '\t')
253                  {
254                     pCurrent++;
255                  }
256                  else if (*pCurrent == CHAR_CR || *pCurrent == CHAR_LF)
257                  {
258                     /* End of line */
259                     break;
260                  }
261                  else
262                  {
263                     LOG_ERROR(pContext, "Non-space character follows a sectino header: 0x02X", *pCurrent);
264                  }
265               }
266               pContext->nOffset = pCurrent - pContext->pManifestContent;
267               pContext->nSectionStartOffset = pContext->nOffset;
268               return S_SUCCESS;
269            }
270            /* Check section name character */
271            if (!static_checkCharacter(*pCurrent, CHARACTER_SECTION_NAME))
272            {
273               LOG_ERROR(pContext, "Invalid character for a section name: 0x%02X", *pCurrent);
274               goto error;
275            }
276            pCurrent++;
277         }
278      }
279
280      if (pContext->nSectionStartOffset == 0)
281      {
282         /* No section has been found yet. This is a bad format */
283         LOG_ERROR(pContext, "Property found outside any section");
284         goto error;
285      }
286   }
287
288   *ppName = pCurrent;
289
290   /* Check first character of name is in [A-Za-z0-9] */
291   if (!static_checkCharacter(*pCurrent, CHARACTER_NAME_FIRST))
292   {
293      LOG_ERROR(pContext, "Invalid first character for a property name: 0x%02X", *pCurrent);
294      goto error;
295   }
296   pCurrent++;
297   pLastNonWhitespaceChar = pCurrent;
298   while (true)
299   {
300      if (pCurrent == pEnd)
301      {
302         LOG_ERROR(pContext, "EOF reached within a property name");
303         goto error;
304      }
305      if (*pCurrent == ':')
306      {
307         /* Colon reached */
308         break;
309      }
310      if (pContext->nType != LIB_MANIFEST2_TYPE_COMPILED)
311      {
312         /* In source manifest, allow space characters before the colon.
313            This is a deviation from the spec. Spec must be updated */
314         if (*pCurrent == ' ' || *pCurrent == '\t')
315         {
316            pCurrent++;
317            continue;
318         }
319      }
320      if (!static_checkCharacter(*pCurrent, CHARACTER_NAME_SUBSEQUENT))
321      {
322         LOG_ERROR(pContext, "Invalid character for a property name: 0x%02X", *pCurrent);
323         goto error;
324      }
325      if (pContext->nType != LIB_MANIFEST2_TYPE_COMPILED)
326      {
327         /* Even in a source manifest, property name cannot contain spaces! */
328         if (pCurrent != pLastNonWhitespaceChar)
329         {
330            LOG_ERROR(pContext, "Property name cannot contain spaces");
331            goto error;
332         }
333      }
334      pCurrent++;
335      pLastNonWhitespaceChar = pCurrent;
336   }
337   *pNameLength = pLastNonWhitespaceChar - *ppName;
338   pCurrent++;
339   /* Skip spaces and tabs on the right of the colon */
340   while (pCurrent < pEnd && (*pCurrent == ' ' || *pCurrent == '\t'))
341   {
342      pCurrent++;
343   }
344   *ppValue = pCurrent;
345   pLastNonWhitespaceChar = pCurrent-1;
346
347   nCurrentSequenceCount = 0;
348   nCurrentChar = 0;
349
350   while (pCurrent < pEnd)
351   {
352      uint32_t x;
353      x = *pCurrent;
354      if ((x & 0x80) == 0)
355      {
356         if (nCurrentSequenceCount != 0)
357         {
358            /* We were expecting a 10xxxxxx byte: ill-formed UTF-8 */
359            LOG_ERROR(pContext, "Invalid UTF-8 sequence");
360            goto error;
361         }
362         else if (x == 0)
363         {
364            /* The null character is forbidden */
365            LOG_ERROR(pContext, "NUL character forbidden");
366            goto error;
367         }
368         /* We have a well-formed Unicode character */
369         nCurrentChar = x;
370      }
371      else if ((x & 0xC0) == 0xC0)
372      {
373         /* Start of a sequence */
374         if (nCurrentSequenceCount != 0)
375         {
376            /* We were expecting a 10xxxxxx byte: ill-formed UTF-8 */
377            LOG_ERROR(pContext, "Invalid UTF-8 sequence");
378            goto error;
379         }
380         else if ((x & 0xE0) == 0xC0)
381         {
382            /* 1 byte follows */
383            nCurrentChar = x & 0x1F;
384            nCurrentSequenceCount = 1;
385            if ((x & 0x1E) == 0)
386            {
387               /* Illegal UTF-8: overlong encoding of character in the [0x00-0x7F] range
388                  (must use 1-byte encoding, not a 2-byte encoding) */
389               LOG_ERROR(pContext, "Invalid UTF-8 sequence");
390               goto error;
391            }
392         }
393         else if ((x & 0xF0) == 0xE0)
394         {
395            /* 2 bytes follow */
396            nCurrentChar = x & 0x0F;
397            nCurrentSequenceCount = 2;
398         }
399         else if ((x & 0xF8) == 0xF0)
400         {
401            /* 3 bytes follow */
402            nCurrentChar = x & 0x07;
403            nCurrentSequenceCount = 3;
404         }
405         else
406         {
407            /* Illegal start of sequence */
408            LOG_ERROR(pContext, "Invalid UTF-8 sequence");
409            goto error;
410         }
411      }
412      else if ((x & 0xC0) == 0x80)
413      {
414         /* Continuation byte */
415         if (nCurrentSequenceCount == 0)
416         {
417            /* We were expecting a sequence start, not a continuation byte */
418            LOG_ERROR(pContext, "Invalid UTF-8 sequence");
419            goto error;
420         }
421         else
422         {
423            if (nCurrentSequenceCount == 2)
424            {
425               /* We're in a 3-byte sequence, check that we're not using an overlong sequence */
426               if (nCurrentChar == 0 && (x & 0x20) == 0)
427               {
428                  /* The character starts with at least 5 zero bits, so has fewer than 11 bits. It should
429                     have used a 2-byte sequence, not a 3-byte sequence */
430                  LOG_ERROR(pContext, "Invalid UTF-8 sequence");
431                  goto error;
432               }
433            }
434            else if (nCurrentSequenceCount == 3)
435            {
436               if (nCurrentChar == 0 && (x & 0x30) == 0)
437               {
438                  /* The character starts with at least 5 zero bits, so has fewer than 16 bits. It should
439                     have used a 3-byte sequence, not a 4-byte sequence */
440                  LOG_ERROR(pContext, "Invalid UTF-8 sequence");
441                  goto error;
442               }
443            }
444            nCurrentSequenceCount--;
445            nCurrentChar = (nCurrentChar << 6) | (x & 0x3F);
446         }
447      }
448      else
449      {
450         /* Illegal byte */
451         LOG_ERROR(pContext, "Invalid UTF-8 sequence");
452         goto error;
453      }
454      if (nCurrentSequenceCount == 0)
455      {
456         /* nCurrentChar contains the current Unicode character */
457         /* check character */
458         if ((nCurrentChar >= 0xD800 && nCurrentChar < 0xE000) || nCurrentChar >= 0x110000)
459         {
460            /* Illegal code point */
461            LOG_ERROR(pContext, "Invalid UTF-8 code point 0x%X", nCurrentChar);
462            goto error;
463         }
464
465         if (*pCurrent == CHAR_CR)
466         {
467            if (pContext->nType == LIB_MANIFEST2_TYPE_COMPILED)
468            {
469               /* Check if a LF follows */
470               pCurrent++;
471               if (pCurrent < pEnd && *pCurrent == CHAR_LF)
472               {
473                  pCurrent++;
474               }
475               pContext->nLine++;
476            }
477            goto end;
478         }
479         else if (*pCurrent == CHAR_LF)
480         {
481            if (pContext->nType == LIB_MANIFEST2_TYPE_COMPILED)
482            {
483               pCurrent++;
484               pContext->nLine++;
485            }
486            goto end;
487         }
488      }
489      if (*pCurrent != ' ' && *pCurrent != CHAR_TAB)
490      {
491         /* It's a non-whitespace char */
492         pLastNonWhitespaceChar = pCurrent;
493      }
494      pCurrent++;
495   }
496
497   /* Hit the end of the manifest; Check that we're not in the middle of a sequence */
498   if (nCurrentSequenceCount != 0)
499   {
500      LOG_ERROR(pContext, "File ends in the middle of an UTF-8 sequence");
501      goto error;
502   }
503
504end:
505
506   *pValueLength = pLastNonWhitespaceChar - *ppValue + 1;
507   pContext->nOffset = pCurrent - pContext->pManifestContent;
508
509   return S_SUCCESS;
510
511error:
512   *ppName = NULL;
513   *pNameLength = 0;
514   *ppValue = NULL;
515   *pValueLength = 0;
516   return nResult;
517}
518
519S_RESULT libManifest2GetNextItem(
520   LIB_MANIFEST2_CONTEXT* pContext,
521   OUT uint8_t** ppName,
522   OUT uint32_t* pNameLength,
523   OUT uint8_t** ppValue,
524   OUT uint32_t* pValueLength)
525{
526   if (pContext->nType == LIB_MANIFEST2_TYPE_COMPILED)
527   {
528      /* Don't check for duplicates in binary manifests */
529      return static_libManifest2GetNextItemInternal(
530         pContext,
531         ppName,
532         pNameLength,
533         ppValue,
534         pValueLength);
535   }
536   else
537   {
538      uint32_t nOriginalOffset = pContext->nOffset;
539      uint32_t nOffset;
540      uint32_t nLine;
541      uint32_t nSectionStartOffset;
542      S_RESULT nResult;
543      uint8_t* pDupName;
544      uint32_t nDupNameLength;
545      uint8_t* pDupValue;
546      uint32_t nDupValueLength;
547
548      /* First get the item */
549      nResult = static_libManifest2GetNextItemInternal(
550         pContext,
551         ppName,
552         pNameLength,
553         ppValue,
554         pValueLength);
555      if (nResult != S_SUCCESS)
556      {
557         return nResult;
558      }
559      /* Save the state of the parser */
560      nOffset = pContext->nOffset;
561      nLine = pContext->nLine;
562      nSectionStartOffset = pContext->nSectionStartOffset;
563      if (pContext->nType == LIB_MANIFEST2_TYPE_SOURCE)
564      {
565         pContext->nOffset = 0;
566      }
567      else if (*ppValue == NULL)
568      {
569         /* The item was a section header. Iterate on all section headers and
570            check for duplicates */
571         pContext->nOffset = 0;
572      }
573      else
574      {
575         if (nSectionStartOffset == 0)
576         {
577            LOG_ERROR(pContext, "Property definition outside any section");
578            goto bad_format;
579         }
580         /* Iterate only on the properties in the section */
581         pContext->nOffset = nSectionStartOffset;
582      }
583      while (pContext->nOffset < nOriginalOffset)
584      {
585         static_libManifest2GetNextItemInternal(
586            pContext,
587            &pDupName,
588            &nDupNameLength,
589            &pDupValue,
590            &nDupValueLength);
591         if (pContext->nType == LIB_MANIFEST2_TYPE_SOURCE_WITH_SECTIONS && *ppValue == NULL)
592         {
593            /* Check for duplicate section names */
594            if (pDupValue == NULL
595                &&
596                static_sectionNameEqualCaseInsensitive(
597                   *ppName,
598                   *pNameLength,
599                   pDupName,
600                   nDupNameLength))
601            {
602               pContext->nOffset = nOffset;
603               pContext->nLine = nLine;
604               pContext->nSectionStartOffset = nSectionStartOffset;
605               LOG_ERROR(pContext, "Duplicate section %.*s", nDupNameLength, pDupName);
606               goto bad_format;
607            }
608         }
609         else
610         {
611            /* Check for duplicate property name */
612            if (nDupNameLength == *pNameLength &&
613                memcmp(pDupName, *ppName, nDupNameLength) == 0)
614            {
615               /* Duplicated property */
616               pContext->nOffset = nOffset;
617               pContext->nLine = nLine;
618               pContext->nSectionStartOffset = nSectionStartOffset;
619               LOG_ERROR(pContext,"Duplicate property %.*s", nDupNameLength, pDupName);
620               goto bad_format;
621            }
622         }
623      }
624      /* Everything's fine. restore context and exit  */
625      /* Restore the context */
626      pContext->nOffset = nOffset;
627      pContext->nLine = nLine;
628      pContext->nSectionStartOffset = nSectionStartOffset;
629
630      return S_SUCCESS;
631bad_format:
632      *ppName = NULL;
633      *pNameLength = 0;
634      *ppValue = NULL;
635      *pValueLength = 0;
636      return S_ERROR_BAD_FORMAT;
637   }
638}
639
640
641S_RESULT libManifest2CheckFormat(
642   LIB_MANIFEST2_CONTEXT* pContext,
643   uint32_t* pnItemCount)
644{
645   uint32_t nPropertyCount = 0;
646   uint8_t* pName;
647   uint32_t nNameLength;
648   uint8_t* pValue;
649   uint32_t nValueLength;
650   S_RESULT nResult;
651
652   pContext->nOffset = 0;
653   pContext->nLine = 1;
654   pContext->nSectionStartOffset = 0;
655
656   while (true)
657   {
658      nResult = libManifest2GetNextItem(
659         pContext,
660         &pName,
661         &nNameLength,
662         &pValue,
663         &nValueLength);
664      if (nResult == S_ERROR_ITEM_NOT_FOUND)
665      {
666         if (pnItemCount != NULL)
667         {
668            *pnItemCount = nPropertyCount;
669         }
670         return S_SUCCESS;
671      }
672      if (nResult != S_SUCCESS)
673      {
674         return nResult;
675      }
676      nPropertyCount++;
677   }
678}
679