1# -*- coding: Latin-1 -*-
2"""pefile, Portable Executable reader module
3
4
5All the PE file basic structures are available with their default names
6as attributes of the instance returned.
7
8Processed elements such as the import table are made available with lowercase
9names, to differentiate them from the upper case basic structure names.
10
11pefile has been tested against the limits of valid PE headers, that is, malware.
12Lots of packed malware attempt to abuse the format way beyond its standard use.
13To the best of my knowledge most of the abuses are handled gracefully.
14
15Copyright (c) 2005, 2006, 2007, 2008 Ero Carrera <ero@dkbza.org>
16
17All rights reserved.
18
19For detailed copyright information see the file COPYING in
20the root of the distribution archive.
21"""
22
23__author__ = 'Ero Carrera'
24__version__ = '1.2.9.1'
25__contact__ = 'ero@dkbza.org'
26
27
28import os
29import struct
30import time
31import math
32import re
33import exceptions
34import string
35import array
36
37sha1, sha256, sha512, md5 = None, None, None, None
38
39try:
40    import hashlib
41    sha1 = hashlib.sha1
42    sha256 = hashlib.sha256
43    sha512 = hashlib.sha512
44    md5 = hashlib.md5
45except ImportError:
46    try:
47        import sha
48        sha1 = sha.new
49    except ImportError:
50        pass
51    try:
52        import md5
53        md5 = md5.new
54    except ImportError:
55        pass
56
57
58fast_load = False
59
60IMAGE_DOS_SIGNATURE             = 0x5A4D
61IMAGE_OS2_SIGNATURE             = 0x454E
62IMAGE_OS2_SIGNATURE_LE          = 0x454C
63IMAGE_VXD_SIGNATURE             = 0x454C
64IMAGE_NT_SIGNATURE              = 0x00004550
65IMAGE_NUMBEROF_DIRECTORY_ENTRIES= 16
66IMAGE_ORDINAL_FLAG              = 0x80000000L
67IMAGE_ORDINAL_FLAG64            = 0x8000000000000000L
68OPTIONAL_HEADER_MAGIC_PE        = 0x10b
69OPTIONAL_HEADER_MAGIC_PE_PLUS   = 0x20b
70
71
72directory_entry_types = [
73    ('IMAGE_DIRECTORY_ENTRY_EXPORT',        0),
74    ('IMAGE_DIRECTORY_ENTRY_IMPORT',        1),
75    ('IMAGE_DIRECTORY_ENTRY_RESOURCE',      2),
76    ('IMAGE_DIRECTORY_ENTRY_EXCEPTION',     3),
77    ('IMAGE_DIRECTORY_ENTRY_SECURITY',      4),
78    ('IMAGE_DIRECTORY_ENTRY_BASERELOC',     5),
79    ('IMAGE_DIRECTORY_ENTRY_DEBUG',         6),
80    ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT',     7),
81    ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR',     8),
82    ('IMAGE_DIRECTORY_ENTRY_TLS',           9),
83    ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG',   10),
84    ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT',  11),
85    ('IMAGE_DIRECTORY_ENTRY_IAT',           12),
86    ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT',  13),
87    ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14),
88    ('IMAGE_DIRECTORY_ENTRY_RESERVED',      15) ]
89
90DIRECTORY_ENTRY = dict([(e[1], e[0]) for e in directory_entry_types]+directory_entry_types)
91
92
93image_characteristics = [
94    ('IMAGE_FILE_RELOCS_STRIPPED',          0x0001),
95    ('IMAGE_FILE_EXECUTABLE_IMAGE',         0x0002),
96    ('IMAGE_FILE_LINE_NUMS_STRIPPED',       0x0004),
97    ('IMAGE_FILE_LOCAL_SYMS_STRIPPED',      0x0008),
98    ('IMAGE_FILE_AGGRESIVE_WS_TRIM',        0x0010),
99    ('IMAGE_FILE_LARGE_ADDRESS_AWARE',      0x0020),
100    ('IMAGE_FILE_16BIT_MACHINE',            0x0040),
101    ('IMAGE_FILE_BYTES_REVERSED_LO',        0x0080),
102    ('IMAGE_FILE_32BIT_MACHINE',            0x0100),
103    ('IMAGE_FILE_DEBUG_STRIPPED',           0x0200),
104    ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP',  0x0400),
105    ('IMAGE_FILE_NET_RUN_FROM_SWAP',        0x0800),
106    ('IMAGE_FILE_SYSTEM',                   0x1000),
107    ('IMAGE_FILE_DLL',                      0x2000),
108    ('IMAGE_FILE_UP_SYSTEM_ONLY',           0x4000),
109    ('IMAGE_FILE_BYTES_REVERSED_HI',        0x8000) ]
110
111IMAGE_CHARACTERISTICS = dict([(e[1], e[0]) for e in
112    image_characteristics]+image_characteristics)
113
114
115section_characteristics = [
116    ('IMAGE_SCN_CNT_CODE',                  0x00000020),
117    ('IMAGE_SCN_CNT_INITIALIZED_DATA',      0x00000040),
118    ('IMAGE_SCN_CNT_UNINITIALIZED_DATA',    0x00000080),
119    ('IMAGE_SCN_LNK_OTHER',                 0x00000100),
120    ('IMAGE_SCN_LNK_INFO',                  0x00000200),
121    ('IMAGE_SCN_LNK_REMOVE',                0x00000800),
122    ('IMAGE_SCN_LNK_COMDAT',                0x00001000),
123    ('IMAGE_SCN_MEM_FARDATA',               0x00008000),
124    ('IMAGE_SCN_MEM_PURGEABLE',             0x00020000),
125    ('IMAGE_SCN_MEM_16BIT',                 0x00020000),
126    ('IMAGE_SCN_MEM_LOCKED',                0x00040000),
127    ('IMAGE_SCN_MEM_PRELOAD',               0x00080000),
128    ('IMAGE_SCN_ALIGN_1BYTES',              0x00100000),
129    ('IMAGE_SCN_ALIGN_2BYTES',              0x00200000),
130    ('IMAGE_SCN_ALIGN_4BYTES',              0x00300000),
131    ('IMAGE_SCN_ALIGN_8BYTES',              0x00400000),
132    ('IMAGE_SCN_ALIGN_16BYTES',             0x00500000),
133    ('IMAGE_SCN_ALIGN_32BYTES',             0x00600000),
134    ('IMAGE_SCN_ALIGN_64BYTES',             0x00700000),
135    ('IMAGE_SCN_ALIGN_128BYTES',            0x00800000),
136    ('IMAGE_SCN_ALIGN_256BYTES',            0x00900000),
137    ('IMAGE_SCN_ALIGN_512BYTES',            0x00A00000),
138    ('IMAGE_SCN_ALIGN_1024BYTES',           0x00B00000),
139    ('IMAGE_SCN_ALIGN_2048BYTES',           0x00C00000),
140    ('IMAGE_SCN_ALIGN_4096BYTES',           0x00D00000),
141    ('IMAGE_SCN_ALIGN_8192BYTES',           0x00E00000),
142    ('IMAGE_SCN_ALIGN_MASK',                0x00F00000),
143    ('IMAGE_SCN_LNK_NRELOC_OVFL',           0x01000000),
144    ('IMAGE_SCN_MEM_DISCARDABLE',           0x02000000),
145    ('IMAGE_SCN_MEM_NOT_CACHED',            0x04000000),
146    ('IMAGE_SCN_MEM_NOT_PAGED',             0x08000000),
147    ('IMAGE_SCN_MEM_SHARED',                0x10000000),
148    ('IMAGE_SCN_MEM_EXECUTE',               0x20000000),
149    ('IMAGE_SCN_MEM_READ',                  0x40000000),
150    ('IMAGE_SCN_MEM_WRITE',                 0x80000000L) ]
151
152SECTION_CHARACTERISTICS = dict([(e[1], e[0]) for e in
153    section_characteristics]+section_characteristics)
154
155
156debug_types = [
157    ('IMAGE_DEBUG_TYPE_UNKNOWN',        0),
158    ('IMAGE_DEBUG_TYPE_COFF',           1),
159    ('IMAGE_DEBUG_TYPE_CODEVIEW',       2),
160    ('IMAGE_DEBUG_TYPE_FPO',            3),
161    ('IMAGE_DEBUG_TYPE_MISC',           4),
162    ('IMAGE_DEBUG_TYPE_EXCEPTION',      5),
163    ('IMAGE_DEBUG_TYPE_FIXUP',          6),
164    ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC',    7),
165    ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC',  8),
166    ('IMAGE_DEBUG_TYPE_BORLAND',        9),
167    ('IMAGE_DEBUG_TYPE_RESERVED10',     10) ]
168
169DEBUG_TYPE = dict([(e[1], e[0]) for e in debug_types]+debug_types)
170
171
172subsystem_types = [
173    ('IMAGE_SUBSYSTEM_UNKNOWN',     0),
174    ('IMAGE_SUBSYSTEM_NATIVE',      1),
175    ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2),
176    ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3),
177    ('IMAGE_SUBSYSTEM_OS2_CUI',     5),
178    ('IMAGE_SUBSYSTEM_POSIX_CUI',   7),
179    ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI',  9),
180    ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10),
181    ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11),
182    ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER',      12),
183    ('IMAGE_SUBSYSTEM_EFI_ROM',     13),
184    ('IMAGE_SUBSYSTEM_XBOX',        14)]
185
186SUBSYSTEM_TYPE = dict([(e[1], e[0]) for e in subsystem_types]+subsystem_types)
187
188
189machine_types = [
190    ('IMAGE_FILE_MACHINE_UNKNOWN',  0),
191    ('IMAGE_FILE_MACHINE_AM33',     0x1d3),
192    ('IMAGE_FILE_MACHINE_AMD64',    0x8664),
193    ('IMAGE_FILE_MACHINE_ARM',      0x1c0),
194    ('IMAGE_FILE_MACHINE_EBC',      0xebc),
195    ('IMAGE_FILE_MACHINE_I386',     0x14c),
196    ('IMAGE_FILE_MACHINE_IA64',     0x200),
197    ('IMAGE_FILE_MACHINE_MR32',     0x9041),
198    ('IMAGE_FILE_MACHINE_MIPS16',   0x266),
199    ('IMAGE_FILE_MACHINE_MIPSFPU',  0x366),
200    ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466),
201    ('IMAGE_FILE_MACHINE_POWERPC',  0x1f0),
202    ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1),
203    ('IMAGE_FILE_MACHINE_R4000',    0x166),
204    ('IMAGE_FILE_MACHINE_SH3',      0x1a2),
205    ('IMAGE_FILE_MACHINE_SH3DSP',   0x1a3),
206    ('IMAGE_FILE_MACHINE_SH4',      0x1a6),
207    ('IMAGE_FILE_MACHINE_SH5',      0x1a8),
208    ('IMAGE_FILE_MACHINE_THUMB',    0x1c2),
209    ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169),
210 ]
211
212MACHINE_TYPE = dict([(e[1], e[0]) for e in machine_types]+machine_types)
213
214
215relocation_types = [
216    ('IMAGE_REL_BASED_ABSOLUTE',        0),
217    ('IMAGE_REL_BASED_HIGH',            1),
218    ('IMAGE_REL_BASED_LOW',             2),
219    ('IMAGE_REL_BASED_HIGHLOW',         3),
220    ('IMAGE_REL_BASED_HIGHADJ',         4),
221    ('IMAGE_REL_BASED_MIPS_JMPADDR',    5),
222    ('IMAGE_REL_BASED_SECTION',         6),
223    ('IMAGE_REL_BASED_REL',             7),
224    ('IMAGE_REL_BASED_MIPS_JMPADDR16',  9),
225    ('IMAGE_REL_BASED_IA64_IMM64',      9),
226    ('IMAGE_REL_BASED_DIR64',           10),
227    ('IMAGE_REL_BASED_HIGH3ADJ',        11) ]
228
229RELOCATION_TYPE = dict([(e[1], e[0]) for e in relocation_types]+relocation_types)
230
231
232dll_characteristics = [
233    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001),
234    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002),
235    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004),
236    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008),
237    ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE',      0x0040),
238    ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY',   0x0080),
239    ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT',         0x0100),
240    ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION',      0x0200),
241    ('IMAGE_DLL_CHARACTERISTICS_NO_SEH',    0x0400),
242    ('IMAGE_DLL_CHARACTERISTICS_NO_BIND',   0x0800),
243    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000),
244    ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER',    0x2000),
245    ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ]
246
247DLL_CHARACTERISTICS = dict([(e[1], e[0]) for e in dll_characteristics]+dll_characteristics)
248
249
250# Resource types
251resource_type = [
252    ('RT_CURSOR',          1),
253    ('RT_BITMAP',          2),
254    ('RT_ICON',            3),
255    ('RT_MENU',            4),
256    ('RT_DIALOG',          5),
257    ('RT_STRING',          6),
258    ('RT_FONTDIR',         7),
259    ('RT_FONT',            8),
260    ('RT_ACCELERATOR',     9),
261    ('RT_RCDATA',          10),
262    ('RT_MESSAGETABLE',    11),
263    ('RT_GROUP_CURSOR',    12),
264    ('RT_GROUP_ICON',      14),
265    ('RT_VERSION',         16),
266    ('RT_DLGINCLUDE',      17),
267    ('RT_PLUGPLAY',        19),
268    ('RT_VXD',             20),
269    ('RT_ANICURSOR',       21),
270    ('RT_ANIICON',         22),
271    ('RT_HTML',            23),
272    ('RT_MANIFEST',        24) ]
273
274RESOURCE_TYPE = dict([(e[1], e[0]) for e in resource_type]+resource_type)
275
276
277# Language definitions
278lang = [
279 ('LANG_NEUTRAL',       0x00),
280 ('LANG_INVARIANT',     0x7f),
281 ('LANG_AFRIKAANS',     0x36),
282 ('LANG_ALBANIAN',      0x1c),
283 ('LANG_ARABIC',        0x01),
284 ('LANG_ARMENIAN',      0x2b),
285 ('LANG_ASSAMESE',      0x4d),
286 ('LANG_AZERI',         0x2c),
287 ('LANG_BASQUE',        0x2d),
288 ('LANG_BELARUSIAN',    0x23),
289 ('LANG_BENGALI',       0x45),
290 ('LANG_BULGARIAN',     0x02),
291 ('LANG_CATALAN',       0x03),
292 ('LANG_CHINESE',       0x04),
293 ('LANG_CROATIAN',      0x1a),
294 ('LANG_CZECH',         0x05),
295 ('LANG_DANISH',        0x06),
296 ('LANG_DIVEHI',        0x65),
297 ('LANG_DUTCH',         0x13),
298 ('LANG_ENGLISH',       0x09),
299 ('LANG_ESTONIAN',      0x25),
300 ('LANG_FAEROESE',      0x38),
301 ('LANG_FARSI',         0x29),
302 ('LANG_FINNISH',       0x0b),
303 ('LANG_FRENCH',        0x0c),
304 ('LANG_GALICIAN',      0x56),
305 ('LANG_GEORGIAN',      0x37),
306 ('LANG_GERMAN',        0x07),
307 ('LANG_GREEK',         0x08),
308 ('LANG_GUJARATI',      0x47),
309 ('LANG_HEBREW',        0x0d),
310 ('LANG_HINDI',         0x39),
311 ('LANG_HUNGARIAN',     0x0e),
312 ('LANG_ICELANDIC',     0x0f),
313 ('LANG_INDONESIAN',    0x21),
314 ('LANG_ITALIAN',       0x10),
315 ('LANG_JAPANESE',      0x11),
316 ('LANG_KANNADA',       0x4b),
317 ('LANG_KASHMIRI',      0x60),
318 ('LANG_KAZAK',         0x3f),
319 ('LANG_KONKANI',       0x57),
320 ('LANG_KOREAN',        0x12),
321 ('LANG_KYRGYZ',        0x40),
322 ('LANG_LATVIAN',       0x26),
323 ('LANG_LITHUANIAN',    0x27),
324 ('LANG_MACEDONIAN',    0x2f),
325 ('LANG_MALAY',         0x3e),
326 ('LANG_MALAYALAM',     0x4c),
327 ('LANG_MANIPURI',      0x58),
328 ('LANG_MARATHI',       0x4e),
329 ('LANG_MONGOLIAN',     0x50),
330 ('LANG_NEPALI',        0x61),
331 ('LANG_NORWEGIAN',     0x14),
332 ('LANG_ORIYA',         0x48),
333 ('LANG_POLISH',        0x15),
334 ('LANG_PORTUGUESE',    0x16),
335 ('LANG_PUNJABI',       0x46),
336 ('LANG_ROMANIAN',      0x18),
337 ('LANG_RUSSIAN',       0x19),
338 ('LANG_SANSKRIT',      0x4f),
339 ('LANG_SERBIAN',       0x1a),
340 ('LANG_SINDHI',        0x59),
341 ('LANG_SLOVAK',        0x1b),
342 ('LANG_SLOVENIAN',     0x24),
343 ('LANG_SPANISH',       0x0a),
344 ('LANG_SWAHILI',       0x41),
345 ('LANG_SWEDISH',       0x1d),
346 ('LANG_SYRIAC',        0x5a),
347 ('LANG_TAMIL',         0x49),
348 ('LANG_TATAR',         0x44),
349 ('LANG_TELUGU',        0x4a),
350 ('LANG_THAI',          0x1e),
351 ('LANG_TURKISH',       0x1f),
352 ('LANG_UKRAINIAN',     0x22),
353 ('LANG_URDU',          0x20),
354 ('LANG_UZBEK',         0x43),
355 ('LANG_VIETNAMESE',    0x2a),
356 ('LANG_GAELIC',        0x3c),
357 ('LANG_MALTESE',       0x3a),
358 ('LANG_MAORI',         0x28),
359 ('LANG_RHAETO_ROMANCE',0x17),
360 ('LANG_SAAMI',         0x3b),
361 ('LANG_SORBIAN',       0x2e),
362 ('LANG_SUTU',          0x30),
363 ('LANG_TSONGA',        0x31),
364 ('LANG_TSWANA',        0x32),
365 ('LANG_VENDA',         0x33),
366 ('LANG_XHOSA',         0x34),
367 ('LANG_ZULU',          0x35),
368 ('LANG_ESPERANTO',     0x8f),
369 ('LANG_WALON',         0x90),
370 ('LANG_CORNISH',       0x91),
371 ('LANG_WELSH',         0x92),
372 ('LANG_BRETON',        0x93) ]
373
374LANG = dict(lang+[(e[1], e[0]) for e in lang])
375
376
377# Sublanguage definitions
378sublang =  [
379 ('SUBLANG_NEUTRAL',                        0x00),
380 ('SUBLANG_DEFAULT',                        0x01),
381 ('SUBLANG_SYS_DEFAULT',                    0x02),
382 ('SUBLANG_ARABIC_SAUDI_ARABIA',            0x01),
383 ('SUBLANG_ARABIC_IRAQ',                    0x02),
384 ('SUBLANG_ARABIC_EGYPT',                   0x03),
385 ('SUBLANG_ARABIC_LIBYA',                   0x04),
386 ('SUBLANG_ARABIC_ALGERIA',                 0x05),
387 ('SUBLANG_ARABIC_MOROCCO',                 0x06),
388 ('SUBLANG_ARABIC_TUNISIA',                 0x07),
389 ('SUBLANG_ARABIC_OMAN',                    0x08),
390 ('SUBLANG_ARABIC_YEMEN',                   0x09),
391 ('SUBLANG_ARABIC_SYRIA',                   0x0a),
392 ('SUBLANG_ARABIC_JORDAN',                  0x0b),
393 ('SUBLANG_ARABIC_LEBANON',                 0x0c),
394 ('SUBLANG_ARABIC_KUWAIT',                  0x0d),
395 ('SUBLANG_ARABIC_UAE',                     0x0e),
396 ('SUBLANG_ARABIC_BAHRAIN',                 0x0f),
397 ('SUBLANG_ARABIC_QATAR',                   0x10),
398 ('SUBLANG_AZERI_LATIN',                    0x01),
399 ('SUBLANG_AZERI_CYRILLIC',                 0x02),
400 ('SUBLANG_CHINESE_TRADITIONAL',            0x01),
401 ('SUBLANG_CHINESE_SIMPLIFIED',             0x02),
402 ('SUBLANG_CHINESE_HONGKONG',               0x03),
403 ('SUBLANG_CHINESE_SINGAPORE',              0x04),
404 ('SUBLANG_CHINESE_MACAU',                  0x05),
405 ('SUBLANG_DUTCH',                          0x01),
406 ('SUBLANG_DUTCH_BELGIAN',                  0x02),
407 ('SUBLANG_ENGLISH_US',                     0x01),
408 ('SUBLANG_ENGLISH_UK',                     0x02),
409 ('SUBLANG_ENGLISH_AUS',                    0x03),
410 ('SUBLANG_ENGLISH_CAN',                    0x04),
411 ('SUBLANG_ENGLISH_NZ',                     0x05),
412 ('SUBLANG_ENGLISH_EIRE',                   0x06),
413 ('SUBLANG_ENGLISH_SOUTH_AFRICA',           0x07),
414 ('SUBLANG_ENGLISH_JAMAICA',                0x08),
415 ('SUBLANG_ENGLISH_CARIBBEAN',              0x09),
416 ('SUBLANG_ENGLISH_BELIZE',                 0x0a),
417 ('SUBLANG_ENGLISH_TRINIDAD',               0x0b),
418 ('SUBLANG_ENGLISH_ZIMBABWE',               0x0c),
419 ('SUBLANG_ENGLISH_PHILIPPINES',            0x0d),
420 ('SUBLANG_FRENCH',                         0x01),
421 ('SUBLANG_FRENCH_BELGIAN',                 0x02),
422 ('SUBLANG_FRENCH_CANADIAN',                0x03),
423 ('SUBLANG_FRENCH_SWISS',                   0x04),
424 ('SUBLANG_FRENCH_LUXEMBOURG',              0x05),
425 ('SUBLANG_FRENCH_MONACO',                  0x06),
426 ('SUBLANG_GERMAN',                         0x01),
427 ('SUBLANG_GERMAN_SWISS',                   0x02),
428 ('SUBLANG_GERMAN_AUSTRIAN',                0x03),
429 ('SUBLANG_GERMAN_LUXEMBOURG',              0x04),
430 ('SUBLANG_GERMAN_LIECHTENSTEIN',           0x05),
431 ('SUBLANG_ITALIAN',                        0x01),
432 ('SUBLANG_ITALIAN_SWISS',                  0x02),
433 ('SUBLANG_KASHMIRI_SASIA',                 0x02),
434 ('SUBLANG_KASHMIRI_INDIA',                 0x02),
435 ('SUBLANG_KOREAN',                         0x01),
436 ('SUBLANG_LITHUANIAN',                     0x01),
437 ('SUBLANG_MALAY_MALAYSIA',                 0x01),
438 ('SUBLANG_MALAY_BRUNEI_DARUSSALAM',        0x02),
439 ('SUBLANG_NEPALI_INDIA',                   0x02),
440 ('SUBLANG_NORWEGIAN_BOKMAL',               0x01),
441 ('SUBLANG_NORWEGIAN_NYNORSK',              0x02),
442 ('SUBLANG_PORTUGUESE',                     0x02),
443 ('SUBLANG_PORTUGUESE_BRAZILIAN',           0x01),
444 ('SUBLANG_SERBIAN_LATIN',                  0x02),
445 ('SUBLANG_SERBIAN_CYRILLIC',               0x03),
446 ('SUBLANG_SPANISH',                        0x01),
447 ('SUBLANG_SPANISH_MEXICAN',                0x02),
448 ('SUBLANG_SPANISH_MODERN',                 0x03),
449 ('SUBLANG_SPANISH_GUATEMALA',              0x04),
450 ('SUBLANG_SPANISH_COSTA_RICA',             0x05),
451 ('SUBLANG_SPANISH_PANAMA',                 0x06),
452 ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC',     0x07),
453 ('SUBLANG_SPANISH_VENEZUELA',              0x08),
454 ('SUBLANG_SPANISH_COLOMBIA',               0x09),
455 ('SUBLANG_SPANISH_PERU',                   0x0a),
456 ('SUBLANG_SPANISH_ARGENTINA',              0x0b),
457 ('SUBLANG_SPANISH_ECUADOR',                0x0c),
458 ('SUBLANG_SPANISH_CHILE',                  0x0d),
459 ('SUBLANG_SPANISH_URUGUAY',                0x0e),
460 ('SUBLANG_SPANISH_PARAGUAY',               0x0f),
461 ('SUBLANG_SPANISH_BOLIVIA',                0x10),
462 ('SUBLANG_SPANISH_EL_SALVADOR',            0x11),
463 ('SUBLANG_SPANISH_HONDURAS',               0x12),
464 ('SUBLANG_SPANISH_NICARAGUA',              0x13),
465 ('SUBLANG_SPANISH_PUERTO_RICO',            0x14),
466 ('SUBLANG_SWEDISH',                        0x01),
467 ('SUBLANG_SWEDISH_FINLAND',                0x02),
468 ('SUBLANG_URDU_PAKISTAN',                  0x01),
469 ('SUBLANG_URDU_INDIA',                     0x02),
470 ('SUBLANG_UZBEK_LATIN',                    0x01),
471 ('SUBLANG_UZBEK_CYRILLIC',                 0x02),
472 ('SUBLANG_DUTCH_SURINAM',                  0x03),
473 ('SUBLANG_ROMANIAN',                       0x01),
474 ('SUBLANG_ROMANIAN_MOLDAVIA',              0x02),
475 ('SUBLANG_RUSSIAN',                        0x01),
476 ('SUBLANG_RUSSIAN_MOLDAVIA',               0x02),
477 ('SUBLANG_CROATIAN',                       0x01),
478 ('SUBLANG_LITHUANIAN_CLASSIC',             0x02),
479 ('SUBLANG_GAELIC',                         0x01),
480 ('SUBLANG_GAELIC_SCOTTISH',                0x02),
481 ('SUBLANG_GAELIC_MANX',                    0x03) ]
482
483SUBLANG = dict(sublang+[(e[1], e[0]) for e in sublang])
484
485
486class UnicodeStringWrapperPostProcessor:
487    """This class attemps to help the process of identifying strings
488    that might be plain Unicode or Pascal. A list of strings will be
489    wrapped on it with the hope the overlappings will help make the
490    decission about their type."""
491
492    def __init__(self, pe, rva_ptr):
493        self.pe = pe
494        self.rva_ptr = rva_ptr
495        self.string = None
496
497
498    def get_rva(self):
499        """Get the RVA of the string."""
500
501        return self.rva_ptr
502
503
504    def __str__(self):
505        """Return the escaped ASCII representation of the string."""
506
507        def convert_char(char):
508            if char in string.printable:
509                return char
510            else:
511                return r'\x%02x' % ord(char)
512
513        if self.string:
514            return ''.join([convert_char(c) for c in self.string])
515
516        return ''
517
518
519    def invalidate(self):
520        """Make this instance None, to express it's no known string type."""
521
522        self = None
523
524
525    def render_pascal_16(self):
526
527        self.string = self.pe.get_string_u_at_rva(
528            self.rva_ptr+2,
529            max_length=self.__get_pascal_16_length())
530
531
532    def ask_pascal_16(self, next_rva_ptr):
533        """The next RVA is taken to be the one immediately following this one.
534
535        Such RVA could indicate the natural end of the string and will be checked
536        with the possible length contained in the first word.
537        """
538
539        length = self.__get_pascal_16_length()
540
541        if length == (next_rva_ptr - (self.rva_ptr+2)) / 2:
542            self.length = length
543            return True
544
545        return False
546
547
548    def __get_pascal_16_length(self):
549
550        return self.__get_word_value_at_rva(self.rva_ptr)
551
552
553    def __get_word_value_at_rva(self, rva):
554
555        try:
556            data = self.pe.get_data(self.rva_ptr, 2)
557        except PEFormatError, e:
558            return False
559
560        if len(data)<2:
561            return False
562
563        return struct.unpack('<H', data)[0]
564
565
566    #def render_pascal_8(self):
567    #    """"""
568
569
570    def ask_unicode_16(self, next_rva_ptr):
571        """The next RVA is taken to be the one immediately following this one.
572
573        Such RVA could indicate the natural end of the string and will be checked
574        to see if there's a Unicode NULL character there.
575        """
576
577        if self.__get_word_value_at_rva(next_rva_ptr-2) == 0:
578            self.length = next_rva_ptr - self.rva_ptr
579            return True
580
581        return False
582
583
584    def render_unicode_16(self):
585        """"""
586
587        self.string = self.pe.get_string_u_at_rva(self.rva_ptr)
588
589
590class PEFormatError(Exception):
591    """Generic PE format error exception."""
592
593    def __init__(self, value):
594        self.value = value
595
596    def __str__(self):
597        return repr(self.value)
598
599
600class Dump:
601    """Convenience class for dumping the PE information."""
602
603    def __init__(self):
604        self.text = ''
605
606
607    def add_lines(self, txt, indent=0):
608        """Adds a list of lines.
609
610        The list can be indented with the optional argument 'indent'.
611        """
612        for line in txt:
613            self.add_line(line, indent)
614
615
616    def add_line(self, txt, indent=0):
617        """Adds a line.
618
619        The line can be indented with the optional argument 'indent'.
620        """
621
622        self.add(txt+'\n', indent)
623
624
625    def add(self, txt, indent=0):
626        """Adds some text, no newline will be appended.
627
628        The text can be indented with the optional argument 'indent'.
629        """
630
631        if isinstance(txt, unicode):
632            s = []
633            for c in txt:
634                try:
635                    s.append(str(c))
636                except UnicodeEncodeError, e:
637                    s.append(repr(c))
638
639            txt = ''.join(s)
640
641        self.text += ' '*indent+txt
642
643
644    def add_header(self, txt):
645        """Adds a header element."""
646
647        self.add_line('-'*10+txt+'-'*10+'\n')
648
649
650    def add_newline(self):
651        """Adds a newline."""
652
653        self.text += '\n'
654
655
656    def get_text(self):
657        """Get the text in its current state."""
658
659        return self.text
660
661
662
663class Structure:
664    """Prepare structure object to extract members from data.
665
666    Format is a list containing definitions for the elements
667    of the structure.
668    """
669
670
671    def __init__(self, format, name=None, file_offset=None):
672        # Format is forced little endian, for big endian non Intel platforms
673        self.__format__ = '<'
674        self.__keys__ = []
675#        self.values = {}
676        self.__format_length__ = 0
677        self.__set_format__(format[1])
678        self._all_zeroes = False
679        self.__unpacked_data_elms__ = None
680        self.__file_offset__ = file_offset
681        if name:
682            self.name = name
683        else:
684            self.name = format[0]
685
686
687    def __get_format__(self):
688        return self.__format__
689
690
691    def get_file_offset(self):
692        return self.__file_offset__
693
694    def set_file_offset(self, offset):
695        self.__file_offset__ = offset
696
697    def all_zeroes(self):
698        """Returns true is the unpacked data is all zeroes."""
699
700        return self._all_zeroes
701
702
703    def __set_format__(self, format):
704
705        for elm in format:
706            if ',' in elm:
707                elm_type, elm_name = elm.split(',', 1)
708                self.__format__ += elm_type
709
710                elm_names = elm_name.split(',')
711                names = []
712                for elm_name in elm_names:
713                    if elm_name in self.__keys__:
714                        search_list = [x[:len(elm_name)] for x in self.__keys__]
715                        occ_count = search_list.count(elm_name)
716                        elm_name = elm_name+'_'+str(occ_count)
717                    names.append(elm_name)
718                # Some PE header structures have unions on them, so a certain
719                # value might have different names, so each key has a list of
720                # all the possible members referring to the data.
721                self.__keys__.append(names)
722
723        self.__format_length__ = struct.calcsize(self.__format__)
724
725
726    def sizeof(self):
727        """Return size of the structure."""
728
729        return self.__format_length__
730
731
732    def __unpack__(self, data):
733
734        if len(data)>self.__format_length__:
735            data = data[:self.__format_length__]
736
737        # OC Patch:
738        # Some malware have incorrect header lengths.
739        # Fail gracefully if this occurs
740        # Buggy malware: a29b0118af8b7408444df81701ad5a7f
741        #
742        elif len(data)<self.__format_length__:
743            raise PEFormatError('Data length less than expected header length.')
744
745
746        if data.count(chr(0)) == len(data):
747            self._all_zeroes = True
748
749        self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)
750        for i in xrange(len(self.__unpacked_data_elms__)):
751            for key in self.__keys__[i]:
752#                self.values[key] = self.__unpacked_data_elms__[i]
753                setattr(self, key, self.__unpacked_data_elms__[i])
754
755
756    def __pack__(self):
757
758        new_values = []
759
760        for i in xrange(len(self.__unpacked_data_elms__)):
761
762            for key in self.__keys__[i]:
763                new_val = getattr(self, key)
764                old_val = self.__unpacked_data_elms__[i]
765
766                # In the case of Unions, when the first changed value
767                # is picked the loop is exited
768                if new_val != old_val:
769                    break
770
771            new_values.append(new_val)
772
773        return struct.pack(self.__format__, *new_values)
774
775
776    def __str__(self):
777        return '\n'.join( self.dump() )
778
779    def __repr__(self):
780        return '<Structure: %s>' % (' '.join( [' '.join(s.split()) for s in self.dump()] ))
781
782
783    def dump(self, indentation=0):
784        """Returns a string representation of the structure."""
785
786        dump = []
787
788        dump.append('[%s]' % self.name)
789
790        # Refer to the __set_format__ method for an explanation
791        # of the following construct.
792        for keys in self.__keys__:
793            for key in keys:
794
795                val = getattr(self, key)
796                if isinstance(val, int) or isinstance(val, long):
797                    val_str = '0x%-8X' % (val)
798                    if key == 'TimeDateStamp' or key == 'dwTimeStamp':
799                        try:
800                            val_str += ' [%s UTC]' % time.asctime(time.gmtime(val))
801                        except exceptions.ValueError, e:
802                            val_str += ' [INVALID TIME]'
803                else:
804                    val_str = ''.join(filter(lambda c:c != '\0', str(val)))
805
806                dump.append('%-30s %s' % (key+':', val_str))
807
808        return dump
809
810
811
812class SectionStructure(Structure):
813    """Convenience section handling class."""
814
815    def get_data(self, start, length=None):
816        """Get data chunk from a section.
817
818        Allows to query data from the section by passing the
819        addresses where the PE file would be loaded by default.
820        It is then possible to retrieve code and data by its real
821        addresses as it would be if loaded.
822        """
823
824        offset = start - self.VirtualAddress
825
826        if length:
827            end = offset+length
828        else:
829            end = len(self.data)
830
831        return self.data[offset:end]
832
833
834    def get_rva_from_offset(self, offset):
835        return offset - self.PointerToRawData + self.VirtualAddress
836
837
838    def get_offset_from_rva(self, rva):
839        return (rva - self.VirtualAddress) + self.PointerToRawData
840
841
842    def contains_offset(self, offset):
843        """Check whether the section contains the file offset provided."""
844
845        if not self.PointerToRawData:
846           # bss and other sections containing only uninitialized data must have 0
847           # and do not take space in the file
848           return False
849        return self.PointerToRawData <= offset < self.VirtualAddress + self.SizeOfRawData
850
851
852    def contains_rva(self, rva):
853        """Check whether the section contains the address provided."""
854
855        # PECOFF documentation v8 says:
856        # The total size of the section when loaded into memory.
857        # If this value is greater than SizeOfRawData, the section is zero-padded.
858        # This field is valid only for executable images and should be set to zero
859        # for object files.
860
861        if len(self.data) < self.SizeOfRawData:
862            size = self.Misc_VirtualSize
863        else:
864            size = max(self.SizeOfRawData, self.Misc_VirtualSize)
865
866        return self.VirtualAddress <= rva < self.VirtualAddress + size
867
868    def contains(self, rva):
869        #print "DEPRECATION WARNING: you should use contains_rva() instead of contains()"
870        return self.contains_rva(rva)
871
872
873    def set_data(self, data):
874        """Set the data belonging to the section."""
875
876        self.data = data
877
878
879    def get_entropy(self):
880        """Calculate and return the entropy for the section."""
881
882        return self.entropy_H( self.data )
883
884
885    def get_hash_sha1(self):
886        """Get the SHA-1 hex-digest of the section's data."""
887
888        if sha1 is not None:
889            return sha1( self.data ).hexdigest()
890
891
892    def get_hash_sha256(self):
893        """Get the SHA-256 hex-digest of the section's data."""
894
895        if sha256 is not None:
896            return sha256( self.data ).hexdigest()
897
898
899    def get_hash_sha512(self):
900        """Get the SHA-512 hex-digest of the section's data."""
901
902        if sha512 is not None:
903            return sha512( self.data ).hexdigest()
904
905
906    def get_hash_md5(self):
907        """Get the MD5 hex-digest of the section's data."""
908
909        if md5 is not None:
910            return md5( self.data ).hexdigest()
911
912
913    def entropy_H(self, data):
914        """Calculate the entropy of a chunk of data."""
915
916        if len(data) == 0:
917            return 0.0
918
919        occurences = array.array('L', [0]*256)
920
921        for x in data:
922            occurences[ord(x)] += 1
923
924        entropy = 0
925        for x in occurences:
926            if x:
927                p_x = float(x) / len(data)
928                entropy -= p_x*math.log(p_x, 2)
929
930        return entropy
931
932
933
934class DataContainer:
935    """Generic data container."""
936
937    def __init__(self, **args):
938        for key, value in args.items():
939            setattr(self, key, value)
940
941
942
943class ImportDescData(DataContainer):
944    """Holds import descriptor information.
945
946    dll:        name of the imported DLL
947    imports:    list of imported symbols (ImportData instances)
948    struct:     IMAGE_IMPORT_DESCRIPTOR sctruture
949    """
950
951class ImportData(DataContainer):
952    """Holds imported symbol's information.
953
954    ordinal:    Ordinal of the symbol
955    name:       Name of the symbol
956    bound:      If the symbol is bound, this contains
957                the address.
958    """
959
960class ExportDirData(DataContainer):
961    """Holds export directory information.
962
963    struct:     IMAGE_EXPORT_DIRECTORY structure
964    symbols:    list of exported symbols (ExportData instances)
965"""
966
967class ExportData(DataContainer):
968    """Holds exported symbols' information.
969
970    ordinal:    ordinal of the symbol
971    address:    address of the symbol
972    name:       name of the symbol (None if the symbol is
973                exported by ordinal only)
974    forwarder:  if the symbol is forwarded it will
975                contain the name of the target symbol,
976                None otherwise.
977    """
978
979
980class ResourceDirData(DataContainer):
981    """Holds resource directory information.
982
983    struct:     IMAGE_RESOURCE_DIRECTORY structure
984    entries:    list of entries (ResourceDirEntryData instances)
985    """
986
987class ResourceDirEntryData(DataContainer):
988    """Holds resource directory entry data.
989
990    struct:     IMAGE_RESOURCE_DIRECTORY_ENTRY structure
991    name:       If the resource is identified by name this
992                attribute will contain the name string. None
993                otherwise. If identified by id, the id is
994                availabe at 'struct.Id'
995    id:         the id, also in struct.Id
996    directory:  If this entry has a lower level directory
997                this attribute will point to the
998                ResourceDirData instance representing it.
999    data:       If this entry has no futher lower directories
1000                and points to the actual resource data, this
1001                attribute will reference the corresponding
1002                ResourceDataEntryData instance.
1003    (Either of the 'directory' or 'data' attribute will exist,
1004    but not both.)
1005    """
1006
1007class ResourceDataEntryData(DataContainer):
1008    """Holds resource data entry information.
1009
1010    struct:     IMAGE_RESOURCE_DATA_ENTRY structure
1011    lang:       Primary language ID
1012    sublang:    Sublanguage ID
1013    """
1014
1015class DebugData(DataContainer):
1016    """Holds debug information.
1017
1018    struct:     IMAGE_DEBUG_DIRECTORY structure
1019    """
1020
1021class BaseRelocationData(DataContainer):
1022    """Holds base relocation information.
1023
1024    struct:     IMAGE_BASE_RELOCATION structure
1025    entries:    list of relocation data (RelocationData instances)
1026    """
1027
1028class RelocationData(DataContainer):
1029    """Holds relocation information.
1030
1031    type:       Type of relocation
1032                The type string is can be obtained by
1033                RELOCATION_TYPE[type]
1034    rva:        RVA of the relocation
1035    """
1036
1037class TlsData(DataContainer):
1038    """Holds TLS information.
1039
1040    struct:     IMAGE_TLS_DIRECTORY structure
1041    """
1042
1043class BoundImportDescData(DataContainer):
1044    """Holds bound import descriptor data.
1045
1046    This directory entry will provide with information on the
1047    DLLs this PE files has been bound to (if bound at all).
1048    The structure will contain the name and timestamp of the
1049    DLL at the time of binding so that the loader can know
1050    whether it differs from the one currently present in the
1051    system and must, therefore, re-bind the PE's imports.
1052
1053    struct:     IMAGE_BOUND_IMPORT_DESCRIPTOR structure
1054    name:       DLL name
1055    entries:    list of entries (BoundImportRefData instances)
1056                the entries will exist if this DLL has forwarded
1057                symbols. If so, the destination DLL will have an
1058                entry in this list.
1059    """
1060
1061class BoundImportRefData(DataContainer):
1062    """Holds bound import forwader reference data.
1063
1064    Contains the same information as the bound descriptor but
1065    for forwarded DLLs, if any.
1066
1067    struct:     IMAGE_BOUND_FORWARDER_REF structure
1068    name:       dll name
1069    """
1070
1071
1072class PE:
1073    """A Portable Executable representation.
1074
1075    This class provides access to most of the information in a PE file.
1076
1077    It expects to be supplied the name of the file to load or PE data
1078    to process and an optional argument 'fast_load' (False by default)
1079    which controls whether to load all the directories information,
1080    which can be quite time consuming.
1081
1082    pe = pefile.PE('module.dll')
1083    pe = pefile.PE(name='module.dll')
1084
1085    would load 'module.dll' and process it. If the data would be already
1086    available in a buffer the same could be achieved with:
1087
1088    pe = pefile.PE(data=module_dll_data)
1089
1090    The "fast_load" can be set to a default by setting its value in the
1091    module itself by means,for instance, of a "pefile.fast_load = True".
1092    That will make all the subsequent instances not to load the
1093    whole PE structure. The "full_load" method can be used to parse
1094    the missing data at a later stage.
1095
1096    Basic headers information will be available in the attributes:
1097
1098    DOS_HEADER
1099    NT_HEADERS
1100    FILE_HEADER
1101    OPTIONAL_HEADER
1102
1103    All of them will contain among their attrbitues the members of the
1104    corresponding structures as defined in WINNT.H
1105
1106    The raw data corresponding to the header (from the beginning of the
1107    file up to the start of the first section) will be avaiable in the
1108    instance's attribute 'header' as a string.
1109
1110    The sections will be available as a list in the 'sections' attribute.
1111    Each entry will contain as attributes all the structure's members.
1112
1113    Directory entries will be available as attributes (if they exist):
1114    (no other entries are processed at this point)
1115
1116    DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)
1117    DIRECTORY_ENTRY_EXPORT (ExportDirData instance)
1118    DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)
1119    DIRECTORY_ENTRY_DEBUG (list of DebugData instances)
1120    DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)
1121    DIRECTORY_ENTRY_TLS
1122    DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)
1123
1124    The following dictionary attributes provide ways of mapping different
1125    constants. They will accept the numeric value and return the string
1126    representation and the opposite, feed in the string and get the
1127    numeric constant:
1128
1129    DIRECTORY_ENTRY
1130    IMAGE_CHARACTERISTICS
1131    SECTION_CHARACTERISTICS
1132    DEBUG_TYPE
1133    SUBSYSTEM_TYPE
1134    MACHINE_TYPE
1135    RELOCATION_TYPE
1136    RESOURCE_TYPE
1137    LANG
1138    SUBLANG
1139    """
1140
1141    #
1142    # Format specifications for PE structures.
1143    #
1144
1145    __IMAGE_DOS_HEADER_format__ = ('IMAGE_DOS_HEADER',
1146        ('H,e_magic', 'H,e_cblp', 'H,e_cp',
1147        'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc',
1148        'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum',
1149        'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res',
1150        'H,e_oemid', 'H,e_oeminfo', '20s,e_res2',
1151        'L,e_lfanew'))
1152
1153    __IMAGE_FILE_HEADER_format__ = ('IMAGE_FILE_HEADER',
1154        ('H,Machine', 'H,NumberOfSections',
1155        'L,TimeDateStamp', 'L,PointerToSymbolTable',
1156        'L,NumberOfSymbols', 'H,SizeOfOptionalHeader',
1157        'H,Characteristics'))
1158
1159    __IMAGE_DATA_DIRECTORY_format__ = ('IMAGE_DATA_DIRECTORY',
1160        ('L,VirtualAddress', 'L,Size'))
1161
1162
1163    __IMAGE_OPTIONAL_HEADER_format__ = ('IMAGE_OPTIONAL_HEADER',
1164        ('H,Magic', 'B,MajorLinkerVersion',
1165        'B,MinorLinkerVersion', 'L,SizeOfCode',
1166        'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
1167        'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData',
1168        'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
1169        'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
1170        'H,MajorImageVersion', 'H,MinorImageVersion',
1171        'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
1172        'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
1173        'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
1174        'L,SizeOfStackReserve', 'L,SizeOfStackCommit',
1175        'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit',
1176        'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
1177
1178
1179    __IMAGE_OPTIONAL_HEADER64_format__ = ('IMAGE_OPTIONAL_HEADER64',
1180        ('H,Magic', 'B,MajorLinkerVersion',
1181        'B,MinorLinkerVersion', 'L,SizeOfCode',
1182        'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
1183        'L,AddressOfEntryPoint', 'L,BaseOfCode',
1184        'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
1185        'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
1186        'H,MajorImageVersion', 'H,MinorImageVersion',
1187        'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
1188        'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
1189        'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
1190        'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit',
1191        'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit',
1192        'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
1193
1194
1195    __IMAGE_NT_HEADERS_format__ = ('IMAGE_NT_HEADERS', ('L,Signature',))
1196
1197    __IMAGE_SECTION_HEADER_format__ = ('IMAGE_SECTION_HEADER',
1198        ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize',
1199        'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData',
1200        'L,PointerToRelocations', 'L,PointerToLinenumbers',
1201        'H,NumberOfRelocations', 'H,NumberOfLinenumbers',
1202        'L,Characteristics'))
1203
1204    __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = ('IMAGE_DELAY_IMPORT_DESCRIPTOR',
1205        ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT',
1206        'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp'))
1207
1208    __IMAGE_IMPORT_DESCRIPTOR_format__ =  ('IMAGE_IMPORT_DESCRIPTOR',
1209        ('L,OriginalFirstThunk,Characteristics',
1210        'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk'))
1211
1212    __IMAGE_EXPORT_DIRECTORY_format__ =  ('IMAGE_EXPORT_DIRECTORY',
1213        ('L,Characteristics',
1214        'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name',
1215        'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames',
1216        'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals'))
1217
1218    __IMAGE_RESOURCE_DIRECTORY_format__ = ('IMAGE_RESOURCE_DIRECTORY',
1219        ('L,Characteristics',
1220        'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion',
1221        'H,NumberOfNamedEntries', 'H,NumberOfIdEntries'))
1222
1223    __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = ('IMAGE_RESOURCE_DIRECTORY_ENTRY',
1224        ('L,Name',
1225        'L,OffsetToData'))
1226
1227    __IMAGE_RESOURCE_DATA_ENTRY_format__ = ('IMAGE_RESOURCE_DATA_ENTRY',
1228        ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved'))
1229
1230    __VS_VERSIONINFO_format__ = ( 'VS_VERSIONINFO',
1231        ('H,Length', 'H,ValueLength', 'H,Type' ))
1232
1233    __VS_FIXEDFILEINFO_format__ = ( 'VS_FIXEDFILEINFO',
1234        ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS',
1235         'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileFlags',
1236         'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateLS'))
1237
1238    __StringFileInfo_format__ = ( 'StringFileInfo',
1239        ('H,Length', 'H,ValueLength', 'H,Type' ))
1240
1241    __StringTable_format__ = ( 'StringTable',
1242        ('H,Length', 'H,ValueLength', 'H,Type' ))
1243
1244    __String_format__ = ( 'String',
1245        ('H,Length', 'H,ValueLength', 'H,Type' ))
1246
1247    __Var_format__ = ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' ))
1248
1249    __IMAGE_THUNK_DATA_format__ = ('IMAGE_THUNK_DATA',
1250        ('L,ForwarderString,Function,Ordinal,AddressOfData',))
1251
1252    __IMAGE_THUNK_DATA64_format__ = ('IMAGE_THUNK_DATA',
1253        ('Q,ForwarderString,Function,Ordinal,AddressOfData',))
1254
1255    __IMAGE_DEBUG_DIRECTORY_format__ = ('IMAGE_DEBUG_DIRECTORY',
1256        ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion',
1257        'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData',
1258        'L,PointerToRawData'))
1259
1260    __IMAGE_BASE_RELOCATION_format__ = ('IMAGE_BASE_RELOCATION',
1261        ('L,VirtualAddress', 'L,SizeOfBlock') )
1262
1263    __IMAGE_TLS_DIRECTORY_format__ = ('IMAGE_TLS_DIRECTORY',
1264        ('L,StartAddressOfRawData', 'L,EndAddressOfRawData',
1265        'L,AddressOfIndex', 'L,AddressOfCallBacks',
1266        'L,SizeOfZeroFill', 'L,Characteristics' ) )
1267
1268    __IMAGE_TLS_DIRECTORY64_format__ = ('IMAGE_TLS_DIRECTORY',
1269        ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData',
1270        'Q,AddressOfIndex', 'Q,AddressOfCallBacks',
1271        'L,SizeOfZeroFill', 'L,Characteristics' ) )
1272
1273    __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = ('IMAGE_BOUND_IMPORT_DESCRIPTOR',
1274        ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs'))
1275
1276    __IMAGE_BOUND_FORWARDER_REF_format__ = ('IMAGE_BOUND_FORWARDER_REF',
1277        ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') )
1278
1279
1280    def __init__(self, name=None, data=None, fast_load=None):
1281
1282        self.sections = []
1283
1284        self.__warnings = []
1285
1286        self.PE_TYPE = None
1287
1288        if  not name and not data:
1289            return
1290
1291        # This list will keep track of all the structures created.
1292        # That will allow for an easy iteration through the list
1293        # in order to save the modifications made
1294        self.__structures__ = []
1295
1296        if not fast_load:
1297            fast_load = globals()['fast_load']
1298        self.__parse__(name, data, fast_load)
1299
1300
1301
1302    def __unpack_data__(self, format, data, file_offset):
1303        """Apply structure format to raw data.
1304
1305        Returns and unpacked structure object if successful, None otherwise.
1306        """
1307
1308        structure = Structure(format, file_offset=file_offset)
1309        #if len(data) < structure.sizeof():
1310        #    return None
1311
1312        try:
1313            structure.__unpack__(data)
1314        except PEFormatError, err:
1315            self.__warnings.append(
1316                'Corrupt header "%s" at file offset %d. Exception: %s' % (
1317                    format[0], file_offset, str(err))  )
1318            return None
1319
1320        self.__structures__.append(structure)
1321
1322        return structure
1323
1324
1325
1326    def __parse__(self, fname, data, fast_load):
1327        """Parse a Portable Executable file.
1328
1329        Loads a PE file, parsing all its structures and making them available
1330        through the instance's attributes.
1331        """
1332
1333        if fname:
1334            fd = file(fname, 'rb')
1335            self.__data__ = fd.read()
1336            fd.close()
1337        elif data:
1338            self.__data__ = data
1339
1340
1341        self.DOS_HEADER = self.__unpack_data__(
1342            self.__IMAGE_DOS_HEADER_format__,
1343            self.__data__, file_offset=0)
1344
1345        if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE:
1346            raise PEFormatError('DOS Header magic not found.')
1347
1348        # OC Patch:
1349        # Check for sane value in e_lfanew
1350        #
1351        if self.DOS_HEADER.e_lfanew > len(self.__data__):
1352            raise PEFormatError('Invalid e_lfanew value, probably not a PE file')
1353
1354        nt_headers_offset = self.DOS_HEADER.e_lfanew
1355
1356        self.NT_HEADERS = self.__unpack_data__(
1357            self.__IMAGE_NT_HEADERS_format__,
1358            self.__data__[nt_headers_offset:],
1359            file_offset = nt_headers_offset)
1360
1361        # We better check the signature right here, before the file screws
1362        # around with sections:
1363        # OC Patch:
1364        # Some malware will cause the Signature value to not exist at all
1365        if not self.NT_HEADERS or not self.NT_HEADERS.Signature:
1366            raise PEFormatError('NT Headers not found.')
1367
1368        if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:
1369            raise PEFormatError('Invalid NT Headers signature.')
1370
1371        self.FILE_HEADER = self.__unpack_data__(
1372            self.__IMAGE_FILE_HEADER_format__,
1373            self.__data__[nt_headers_offset+4:],
1374            file_offset = nt_headers_offset+4)
1375        image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
1376
1377        if not self.FILE_HEADER:
1378            raise PEFormatError('File Header missing')
1379
1380        # Set the image's flags according the the Characteristics member
1381        self.set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags)
1382
1383        optional_header_offset =    \
1384            nt_headers_offset+4+self.FILE_HEADER.sizeof()
1385
1386        # Note: location of sections can be controlled from PE header:
1387        sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader
1388
1389        self.OPTIONAL_HEADER = self.__unpack_data__(
1390            self.__IMAGE_OPTIONAL_HEADER_format__,
1391            self.__data__[optional_header_offset:],
1392            file_offset = optional_header_offset)
1393
1394        # According to solardesigner's findings for his
1395        # Tiny PE project, the optional header does not
1396        # need fields beyond "Subsystem" in order to be
1397        # loadable by the Windows loader (given that zeroes
1398        # are acceptable values and the header is loaded
1399        # in a zeroed memory page)
1400        # If trying to parse a full Optional Header fails
1401        # we try to parse it again with some 0 padding
1402        #
1403        MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69
1404
1405        if ( self.OPTIONAL_HEADER is None and
1406            len(self.__data__[optional_header_offset:])
1407                >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
1408
1409            # Add enough zeroes to make up for the unused fields
1410            #
1411            padding_length = 128
1412
1413            # Create padding
1414            #
1415            padded_data = self.__data__[optional_header_offset:] + (
1416                '\0' * padding_length)
1417
1418            self.OPTIONAL_HEADER = self.__unpack_data__(
1419                self.__IMAGE_OPTIONAL_HEADER_format__,
1420                padded_data,
1421                file_offset = optional_header_offset)
1422
1423
1424        # Check the Magic in the OPTIONAL_HEADER and set the PE file
1425        # type accordingly
1426        #
1427        if self.OPTIONAL_HEADER is not None:
1428
1429            if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:
1430
1431                self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE
1432
1433            elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:
1434
1435                self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS
1436
1437                self.OPTIONAL_HEADER = self.__unpack_data__(
1438                    self.__IMAGE_OPTIONAL_HEADER64_format__,
1439                    self.__data__[optional_header_offset:],
1440                    file_offset = optional_header_offset)
1441
1442                # Again, as explained above, we try to parse
1443                # a reduced form of the Optional Header which
1444                # is still valid despite not including all
1445                # structure members
1446                #
1447                MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69+4
1448
1449                if ( self.OPTIONAL_HEADER is None and
1450                    len(self.__data__[optional_header_offset:])
1451                        >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
1452
1453                    padding_length = 128
1454                    padded_data = self.__data__[optional_header_offset:] + (
1455                        '\0' * padding_length)
1456                    self.OPTIONAL_HEADER = self.__unpack_data__(
1457                        self.__IMAGE_OPTIONAL_HEADER64_format__,
1458                        padded_data,
1459                        file_offset = optional_header_offset)
1460
1461
1462        if not self.FILE_HEADER:
1463            raise PEFormatError('File Header missing')
1464
1465
1466        # OC Patch:
1467        # Die gracefully if there is no OPTIONAL_HEADER field
1468        # 975440f5ad5e2e4a92c4d9a5f22f75c1
1469        if self.PE_TYPE is None or self.OPTIONAL_HEADER is None:
1470            raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file")
1471
1472        dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
1473
1474        # Set the Dll Characteristics flags according the the DllCharacteristics member
1475        self.set_flags(
1476            self.OPTIONAL_HEADER,
1477            self.OPTIONAL_HEADER.DllCharacteristics,
1478            dll_characteristics_flags)
1479
1480
1481        self.OPTIONAL_HEADER.DATA_DIRECTORY = []
1482        #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)
1483        offset = (optional_header_offset + self.OPTIONAL_HEADER.sizeof())
1484
1485
1486        self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER
1487        self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER
1488
1489
1490        # The NumberOfRvaAndSizes is sanitized to stay within
1491        # reasonable limits so can be casted to an int
1492        #
1493        if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:
1494            self.__warnings.append(
1495                'Suspicious NumberOfRvaAndSizes in the Optional Header. ' +
1496                'Normal values are never larger than 0x10, the value is: 0x%x' %
1497                self.OPTIONAL_HEADER.NumberOfRvaAndSizes )
1498
1499        for i in xrange(int(0x7fffffffL & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)):
1500
1501            if len(self.__data__[offset:]) == 0:
1502                break
1503
1504            if len(self.__data__[offset:]) < 8:
1505                data = self.__data__[offset:]+'\0'*8
1506            else:
1507                data = self.__data__[offset:]
1508
1509            dir_entry = self.__unpack_data__(
1510                self.__IMAGE_DATA_DIRECTORY_format__,
1511                data,
1512                file_offset = offset)
1513
1514            if dir_entry is None:
1515                break
1516
1517            # Would fail if missing an entry
1518            # 1d4937b2fa4d84ad1bce0309857e70ca offending sample
1519            try:
1520                dir_entry.name = DIRECTORY_ENTRY[i]
1521            except (KeyError, AttributeError):
1522                break
1523
1524            offset += dir_entry.sizeof()
1525
1526            self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)
1527
1528            # If the offset goes outside the optional header,
1529            # the loop is broken, regardless of how many directories
1530            # NumberOfRvaAndSizes says there are
1531            #
1532            # We assume a normally sized optional header, hence that we do
1533            # a sizeof() instead of reading SizeOfOptionalHeader.
1534            # Then we add a default number of drectories times their size,
1535            # if we go beyond that, we assume the number of directories
1536            # is wrong and stop processing
1537            if offset >= (optional_header_offset +
1538                self.OPTIONAL_HEADER.sizeof() + 8*16) :
1539
1540                break
1541
1542
1543        offset = self.parse_sections(sections_offset)
1544
1545        # OC Patch:
1546        # There could be a problem if there are no raw data sections
1547        # greater than 0
1548        # fc91013eb72529da005110a3403541b6 example
1549        # Should this throw an exception in the minimum header offset
1550        # can't be found?
1551        #
1552        rawDataPointers = [
1553            s.PointerToRawData for s in self.sections if s.PointerToRawData>0]
1554
1555        if len(rawDataPointers) > 0:
1556            lowest_section_offset = min(rawDataPointers)
1557        else:
1558            lowest_section_offset = None
1559
1560        if not lowest_section_offset or lowest_section_offset<offset:
1561            self.header = self.__data__[:offset]
1562        else:
1563            self.header = self.__data__[:lowest_section_offset]
1564
1565
1566        # Check whether the entry point lies within a section
1567        #
1568        if self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) is not None:
1569
1570            # Check whether the entry point lies within the file
1571            #
1572            ep_offset = self.get_offset_from_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint)
1573            if ep_offset > len(self.__data__):
1574
1575                self.__warnings.append(
1576                    'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' +
1577                    'AddressOfEntryPoint: 0x%x' %
1578                    self.OPTIONAL_HEADER.AddressOfEntryPoint )
1579
1580        else:
1581
1582            self.__warnings.append(
1583                'AddressOfEntryPoint lies outside the sections\' boundaries. ' +
1584                'AddressOfEntryPoint: 0x%x' %
1585                self.OPTIONAL_HEADER.AddressOfEntryPoint )
1586
1587
1588        if not fast_load:
1589            self.parse_data_directories()
1590
1591
1592    def get_warnings(self):
1593        """Return the list of warnings.
1594
1595        Non-critical problems found when parsing the PE file are
1596        appended to a list of warnings. This method returns the
1597        full list.
1598        """
1599
1600        return self.__warnings
1601
1602
1603    def show_warnings(self):
1604        """Print the list of warnings.
1605
1606        Non-critical problems found when parsing the PE file are
1607        appended to a list of warnings. This method prints the
1608        full list to standard output.
1609        """
1610
1611        for warning in self.__warnings:
1612            print '>', warning
1613
1614
1615    def full_load(self):
1616        """Process the data directories.
1617
1618        This mathod will load the data directories which might not have
1619        been loaded if the "fast_load" option was used.
1620        """
1621
1622        self.parse_data_directories()
1623
1624
1625    def write(self, filename=None):
1626        """Write the PE file.
1627
1628        This function will process all headers and components
1629        of the PE file and include all changes made (by just
1630        assigning to attributes in the PE objects) and write
1631        the changes back to a file whose name is provided as
1632        an argument. The filename is optional.
1633        The data to be written to the file will be returned
1634        as a 'str' object.
1635        """
1636
1637        file_data = list(self.__data__)
1638        for struct in self.__structures__:
1639
1640            struct_data = list(struct.__pack__())
1641            offset = struct.get_file_offset()
1642
1643            file_data[offset:offset+len(struct_data)] = struct_data
1644
1645        if hasattr(self, 'VS_VERSIONINFO'):
1646            if hasattr(self, 'FileInfo'):
1647                for entry in self.FileInfo:
1648                    if hasattr(entry, 'StringTable'):
1649                        for st_entry in entry.StringTable:
1650                            for key, entry in st_entry.entries.items():
1651
1652                                offsets = st_entry.entries_offsets[key]
1653                                lengths = st_entry.entries_lengths[key]
1654
1655                                if len( entry ) > lengths[1]:
1656
1657                                    uc = zip(
1658                                            list(entry[:lengths[1]]), ['\0'] * lengths[1] )
1659                                    l = list()
1660                                    map(l.extend, uc)
1661
1662                                    file_data[
1663                                        offsets[1] : offsets[1] + lengths[1]*2 ] = l
1664
1665                                else:
1666
1667                                    uc = zip(
1668                                            list(entry), ['\0'] * len(entry) )
1669                                    l = list()
1670                                    map(l.extend, uc)
1671
1672                                    file_data[
1673                                        offsets[1] : offsets[1] + len(entry)*2 ] = l
1674
1675                                    remainder = lengths[1] - len(entry)
1676                                    file_data[
1677                                        offsets[1] + len(entry)*2 :
1678                                        offsets[1] + lengths[1]*2 ] = [
1679                                            u'\0' ] * remainder*2
1680
1681        new_file_data = ''.join( [ chr(ord(c)) for c in file_data ] )
1682
1683        if filename:
1684            f = file(filename, 'wb+')
1685            f.write(new_file_data)
1686            f.close()
1687
1688        return new_file_data
1689
1690
1691
1692    def parse_sections(self, offset):
1693        """Fetch the PE file sections.
1694
1695        The sections will be readily available in the "sections" attribute.
1696        Its attributes will contain all the section information plus "data"
1697        a buffer containing the section's data.
1698
1699        The "Characteristics" member will be processed and attributes
1700        representing the section characteristics (with the 'IMAGE_SCN_'
1701        string trimmed from the constant's names) will be added to the
1702        section instance.
1703
1704        Refer to the SectionStructure class for additional info.
1705        """
1706
1707        self.sections = []
1708
1709        for i in xrange(self.FILE_HEADER.NumberOfSections):
1710            section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__)
1711            if not section:
1712                break
1713            section_offset = offset + section.sizeof() * i
1714            section.set_file_offset(section_offset)
1715            section.__unpack__(self.__data__[section_offset:])
1716            self.__structures__.append(section)
1717
1718            if section.SizeOfRawData > len(self.__data__):
1719                self.__warnings.append(
1720                    ('Error parsing section %d. ' % i) +
1721                    'SizeOfRawData is larger than file.')
1722
1723            if section.PointerToRawData > len(self.__data__):
1724                self.__warnings.append(
1725                    ('Error parsing section %d. ' % i) +
1726                    'PointerToRawData points beyond the end of the file.')
1727
1728            if section.Misc_VirtualSize > 0x10000000:
1729                self.__warnings.append(
1730                    ('Suspicious value found parsing section %d. ' % i) +
1731                    'VirtualSize is extremely large > 256MiB.')
1732
1733            if section.VirtualAddress > 0x10000000:
1734                self.__warnings.append(
1735                    ('Suspicious value found parsing section %d. ' % i) +
1736                    'VirtualAddress is beyond 0x10000000.')
1737
1738            #
1739            # Some packer used a non-aligned PointerToRawData in the sections,
1740            # which causes several common tools not to load the section data
1741            # properly as they blindly read from the indicated offset.
1742            # It seems that Windows will round the offset down to the largest
1743            # offset multiple of FileAlignment which is smaller than
1744            # PointerToRawData. The following code will do the same.
1745            #
1746
1747            #alignment = self.OPTIONAL_HEADER.FileAlignment
1748            section_data_start = section.PointerToRawData
1749
1750            if ( self.OPTIONAL_HEADER.FileAlignment != 0 and
1751                (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0):
1752                self.__warnings.append(
1753                    ('Error parsing section %d. ' % i) +
1754                    'Suspicious value for FileAlignment in the Optional Header. ' +
1755                    'Normally the PointerToRawData entry of the sections\' structures ' +
1756                    'is a multiple of FileAlignment, this might imply the file ' +
1757                    'is trying to confuse tools which parse this incorrectly')
1758
1759            section_data_end = section_data_start+section.SizeOfRawData
1760            section.set_data(self.__data__[section_data_start:section_data_end])
1761
1762            section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
1763
1764            # Set the section's flags according the the Characteristics member
1765            self.set_flags(section, section.Characteristics, section_flags)
1766
1767            if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False)  and
1768                section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ):
1769
1770                self.__warnings.append(
1771                    ('Suspicious flags set for section %d. ' % i) +
1772                    'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set.' +
1773                    'This might indicate a packed executable.')
1774
1775            self.sections.append(section)
1776
1777        if self.FILE_HEADER.NumberOfSections > 0 and self.sections:
1778            return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfSections
1779        else:
1780            return offset
1781
1782
1783    def retrieve_flags(self, flag_dict, flag_filter):
1784        """Read the flags from a dictionary and return them in a usable form.
1785
1786        Will return a list of (flag, value) for all flags in "flag_dict"
1787        matching the filter "flag_filter".
1788        """
1789
1790        return [(f[0], f[1]) for f in flag_dict.items() if
1791                isinstance(f[0], str) and f[0].startswith(flag_filter)]
1792
1793
1794    def set_flags(self, obj, flag_field, flags):
1795        """Will process the flags and set attributes in the object accordingly.
1796
1797        The object "obj" will gain attritutes named after the flags provided in
1798        "flags" and valued True/False, matching the results of applyin each
1799        flag value from "flags" to flag_field.
1800        """
1801
1802        for flag in flags:
1803            if flag[1] & flag_field:
1804                setattr(obj, flag[0], True)
1805            else:
1806                setattr(obj, flag[0], False)
1807
1808
1809
1810    def parse_data_directories(self):
1811        """Parse and process the PE file's data directories."""
1812
1813        directory_parsing = (
1814            ('IMAGE_DIRECTORY_ENTRY_IMPORT', self.parse_import_directory),
1815            ('IMAGE_DIRECTORY_ENTRY_EXPORT', self.parse_export_directory),
1816            ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self.parse_resources_directory),
1817            ('IMAGE_DIRECTORY_ENTRY_DEBUG', self.parse_debug_directory),
1818            ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self.parse_relocations_directory),
1819            ('IMAGE_DIRECTORY_ENTRY_TLS', self.parse_directory_tls),
1820            ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self.parse_delay_import_directory),
1821            ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self.parse_directory_bound_imports) )
1822
1823        for entry in directory_parsing:
1824            # OC Patch:
1825            #
1826            try:
1827                dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[
1828                    DIRECTORY_ENTRY[entry[0]]]
1829            except IndexError:
1830                break
1831            if dir_entry.VirtualAddress:
1832                value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)
1833                if value:
1834                    setattr(self, entry[0][6:], value)
1835
1836
1837    def parse_directory_bound_imports(self, rva, size):
1838        """"""
1839
1840        bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)
1841        bnd_descr_size = bnd_descr.sizeof()
1842        start = rva
1843
1844        bound_imports = []
1845        while True:
1846
1847            bnd_descr = self.__unpack_data__(
1848                self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,
1849                   self.__data__[rva:rva+bnd_descr_size],
1850                   file_offset = rva)
1851            if bnd_descr is None:
1852                # If can't parse directory then silently return.
1853                # This directory does not necesarily have to be valid to
1854                # still have a valid PE file
1855
1856                self.__warnings.append(
1857                    'The Bound Imports directory exists but can\'t be parsed.')
1858
1859                return
1860
1861            if bnd_descr.all_zeroes():
1862                break
1863
1864            rva += bnd_descr.sizeof()
1865
1866            forwarder_refs = []
1867            for idx in xrange(bnd_descr.NumberOfModuleForwarderRefs):
1868                # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and
1869                # IMAGE_BOUND_FORWARDER_REF have the same size.
1870                bnd_frwd_ref = self.__unpack_data__(
1871                    self.__IMAGE_BOUND_FORWARDER_REF_format__,
1872                    self.__data__[rva:rva+bnd_descr_size],
1873                    file_offset = rva)
1874                # OC Patch:
1875                if not bnd_frwd_ref:
1876                    raise PEFormatError(
1877                        "IMAGE_BOUND_FORWARDER_REF cannot be read")
1878                rva += bnd_frwd_ref.sizeof()
1879
1880                name_str =  self.get_string_from_data(
1881                    start+bnd_frwd_ref.OffsetModuleName, self.__data__)
1882
1883                if not name_str:
1884                    break
1885                forwarder_refs.append(BoundImportRefData(
1886                    struct = bnd_frwd_ref,
1887                    name = name_str))
1888
1889            name_str = self.get_string_from_data(
1890                start+bnd_descr.OffsetModuleName, self.__data__)
1891
1892            if not name_str:
1893                break
1894            bound_imports.append(
1895                BoundImportDescData(
1896                    struct = bnd_descr,
1897                    name = name_str,
1898                    entries = forwarder_refs))
1899
1900        return bound_imports
1901
1902
1903    def parse_directory_tls(self, rva, size):
1904        """"""
1905
1906        if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
1907            format = self.__IMAGE_TLS_DIRECTORY_format__
1908
1909        elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
1910            format = self.__IMAGE_TLS_DIRECTORY64_format__
1911
1912        tls_struct = self.__unpack_data__(
1913            format,
1914            self.get_data(rva),
1915            file_offset = self.get_offset_from_rva(rva))
1916
1917        if not tls_struct:
1918            return None
1919
1920        return TlsData( struct = tls_struct )
1921
1922
1923    def parse_relocations_directory(self, rva, size):
1924        """"""
1925
1926        rlc = Structure(self.__IMAGE_BASE_RELOCATION_format__)
1927        rlc_size = rlc.sizeof()
1928        end = rva+size
1929
1930        relocations = []
1931        while rva<end:
1932
1933            # OC Patch:
1934            # Malware that has bad rva entries will cause an error.
1935            # Just continue on after an exception
1936            #
1937            try:
1938                rlc = self.__unpack_data__(
1939                    self.__IMAGE_BASE_RELOCATION_format__,
1940                    self.get_data(rva, rlc_size),
1941                    file_offset = self.get_offset_from_rva(rva) )
1942            except PEFormatError:
1943                self.__warnings.append(
1944                    'Invalid relocation information. Can\'t read ' +
1945                    'data at RVA: 0x%x' % rva)
1946                rlc = None
1947
1948            if not rlc:
1949                break
1950
1951            reloc_entries = self.parse_relocations(
1952                rva+rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock-rlc_size)
1953
1954            relocations.append(
1955                BaseRelocationData(
1956                    struct = rlc,
1957                    entries = reloc_entries))
1958
1959            if not rlc.SizeOfBlock:
1960                break
1961            rva += rlc.SizeOfBlock
1962
1963        return relocations
1964
1965
1966    def parse_relocations(self, data_rva, rva, size):
1967        """"""
1968
1969        data = self.get_data(data_rva, size)
1970
1971        entries = []
1972        for idx in xrange(len(data)/2):
1973            word = struct.unpack('<H', data[idx*2:(idx+1)*2])[0]
1974            reloc_type = (word>>12)
1975            reloc_offset = (word&0x0fff)
1976            entries.append(
1977                RelocationData(
1978                    type = reloc_type,
1979                    rva = reloc_offset+rva))
1980
1981        return entries
1982
1983
1984    def parse_debug_directory(self, rva, size):
1985        """"""
1986
1987        dbg = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__)
1988        dbg_size = dbg.sizeof()
1989
1990        debug = []
1991        for idx in xrange(size/dbg_size):
1992            try:
1993                data = self.get_data(rva+dbg_size*idx, dbg_size)
1994            except PEFormatError, e:
1995                self.__warnings.append(
1996                    'Invalid debug information. Can\'t read ' +
1997                    'data at RVA: 0x%x' % rva)
1998                return None
1999
2000            dbg = self.__unpack_data__(
2001                self.__IMAGE_DEBUG_DIRECTORY_format__,
2002                data, file_offset = self.get_offset_from_rva(rva+dbg_size*idx))
2003
2004            if not dbg:
2005                return None
2006
2007            debug.append(
2008                DebugData(
2009                    struct = dbg))
2010
2011        return debug
2012
2013
2014    def parse_resources_directory(self, rva, size=0, base_rva = None, level = 0):
2015        """Parse the resources directory.
2016
2017        Given the rva of the resources directory, it will process all
2018        its entries.
2019
2020        The root will have the corresponding member of its structure,
2021        IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the
2022        entries in the directory.
2023
2024        Those entries will have, correspondingly, all the structure's
2025        members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,
2026        "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure
2027        representing upper layers of the tree. This one will also have
2028        an 'entries' attribute, pointing to the 3rd, and last, level.
2029        Another directory with more entries. Those last entries will
2030        have a new atribute (both 'leaf' or 'data_entry' can be used to
2031        access it). This structure finally points to the resource data.
2032        All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,
2033        are available as its attributes.
2034        """
2035
2036        # OC Patch:
2037        original_rva = rva
2038
2039        if base_rva is None:
2040            base_rva = rva
2041
2042        resources_section = self.get_section_by_rva(rva)
2043
2044        try:
2045            # If the RVA is invalid all would blow up. Some EXEs seem to be
2046            # specially nasty and have an invalid RVA.
2047            data = self.get_data(rva)
2048        except PEFormatError, e:
2049            self.__warnings.append(
2050                'Invalid resources directory. Can\'t read ' +
2051                'directory data at RVA: 0x%x' % rva)
2052            return None
2053
2054        # Get the resource directory structure, that is, the header
2055        # of the table preceding the actual entries
2056        #
2057        resource_dir = self.__unpack_data__(
2058            self.__IMAGE_RESOURCE_DIRECTORY_format__, data,
2059            file_offset = self.get_offset_from_rva(rva) )
2060        if resource_dir is None:
2061            # If can't parse resources directory then silently return.
2062            # This directory does not necesarily have to be valid to
2063            # still have a valid PE file
2064            self.__warnings.append(
2065                'Invalid resources directory. Can\'t parse ' +
2066                'directory data at RVA: 0x%x' % rva)
2067            return None
2068
2069        dir_entries = []
2070
2071        # Advance the rva to the positon immediately following the directory
2072        # table header and pointing to the first entry in the table
2073        #
2074        rva += resource_dir.sizeof()
2075
2076        number_of_entries = (
2077            resource_dir.NumberOfNamedEntries +
2078            resource_dir.NumberOfIdEntries )
2079
2080        strings_to_postprocess = list()
2081
2082        for idx in xrange(number_of_entries):
2083
2084            res = self.parse_resource_entry(rva)
2085            if res is None:
2086                self.__warnings.append(
2087                    'Error parsing the resources directory, ' +
2088                    'Entry %d is invalid, RVA = 0x%x. ' %
2089                    (idx, rva) )
2090                break
2091
2092
2093            entry_name = None
2094            entry_id = None
2095
2096            # If all named entries have been processed, only Id ones
2097            # remain
2098
2099            if idx >= resource_dir.NumberOfNamedEntries:
2100                entry_id = res.Name
2101            else:
2102                ustr_offset = base_rva+res.NameOffset
2103                try:
2104                    #entry_name = self.get_string_u_at_rva(ustr_offset, max_length=16)
2105                    entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset)
2106                    strings_to_postprocess.append(entry_name)
2107
2108                except PEFormatError, excp:
2109                    self.__warnings.append(
2110                        'Error parsing the resources directory, ' +
2111                        'attempting to read entry name. ' +
2112                        'Can\'t read unicode string at offset 0x%x' %
2113                        (ustr_offset) )
2114
2115
2116            if res.DataIsDirectory:
2117                # OC Patch:
2118                #
2119                # One trick malware can do is to recursively reference
2120                # the next directory. This causes hilarity to ensue when
2121                # trying to parse everything correctly.
2122                # If the original RVA given to this function is equal to
2123                # the next one to parse, we assume that it's a trick.
2124                # Instead of raising a PEFormatError this would skip some
2125                # reasonable data so we just break.
2126                #
2127                # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample
2128                if original_rva == (base_rva + res.OffsetToDirectory):
2129
2130                    break
2131
2132                else:
2133                    entry_directory = self.parse_resources_directory(
2134                        base_rva+res.OffsetToDirectory,
2135                        base_rva=base_rva, level = level+1)
2136
2137                if not entry_directory:
2138                    break
2139                dir_entries.append(
2140                    ResourceDirEntryData(
2141                        struct = res,
2142                        name = entry_name,
2143                        id = entry_id,
2144                        directory = entry_directory))
2145
2146            else:
2147                struct = self.parse_resource_data_entry(
2148                    base_rva + res.OffsetToDirectory)
2149
2150                if struct:
2151                    entry_data = ResourceDataEntryData(
2152                        struct = struct,
2153                        lang = res.Name & 0xff,
2154                        sublang = (res.Name>>8) & 0xff)
2155
2156                    dir_entries.append(
2157                        ResourceDirEntryData(
2158                            struct = res,
2159                            name = entry_name,
2160                            id = entry_id,
2161                            data = entry_data))
2162
2163                else:
2164                    break
2165
2166
2167
2168            # Check if this entry contains version information
2169            #
2170            if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']:
2171                if len(dir_entries)>0:
2172                    last_entry = dir_entries[-1]
2173
2174                rt_version_struct = None
2175                try:
2176                    rt_version_struct = last_entry.directory.entries[0].directory.entries[0].data.struct
2177                except:
2178                    # Maybe a malformed directory structure...?
2179                    # Lets ignore it
2180                    pass
2181
2182                if rt_version_struct is not None:
2183                    self.parse_version_information(rt_version_struct)
2184
2185            rva += res.sizeof()
2186
2187
2188        string_rvas = [s.get_rva() for s in strings_to_postprocess]
2189        string_rvas.sort()
2190
2191        for idx, s in enumerate(strings_to_postprocess):
2192            s.render_pascal_16()
2193
2194
2195        resource_directory_data = ResourceDirData(
2196            struct = resource_dir,
2197            entries = dir_entries)
2198
2199        return resource_directory_data
2200
2201
2202    def parse_resource_data_entry(self, rva):
2203        """Parse a data entry from the resources directory."""
2204
2205        try:
2206            # If the RVA is invalid all would blow up. Some EXEs seem to be
2207            # specially nasty and have an invalid RVA.
2208            data = self.get_data(rva)
2209        except PEFormatError, excp:
2210            self.__warnings.append(
2211                'Error parsing a resource directory data entry, ' +
2212                'the RVA is invalid: 0x%x' % ( rva ) )
2213            return None
2214
2215        data_entry = self.__unpack_data__(
2216            self.__IMAGE_RESOURCE_DATA_ENTRY_format__, data,
2217            file_offset = self.get_offset_from_rva(rva) )
2218
2219        return data_entry
2220
2221
2222    def parse_resource_entry(self, rva):
2223        """Parse a directory entry from the resources directory."""
2224
2225        resource = self.__unpack_data__(
2226            self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__, self.get_data(rva),
2227            file_offset = self.get_offset_from_rva(rva) )
2228
2229        if resource is None:
2230            return None
2231
2232        #resource.NameIsString = (resource.Name & 0x80000000L) >> 31
2233        resource.NameOffset = resource.Name & 0x7FFFFFFFL
2234
2235        resource.__pad = resource.Name & 0xFFFF0000L
2236        resource.Id = resource.Name & 0x0000FFFFL
2237
2238        resource.DataIsDirectory = (resource.OffsetToData & 0x80000000L) >> 31
2239        resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFFL
2240
2241        return resource
2242
2243
2244    def parse_version_information(self, version_struct):
2245        """Parse version information structure.
2246
2247        The date will be made available in three attributes of the PE object.
2248
2249        VS_VERSIONINFO     will contain the first three fields of the main structure:
2250            'Length', 'ValueLength', and 'Type'
2251
2252        VS_FIXEDFILEINFO    will hold the rest of the fields, accessible as sub-attributes:
2253            'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',
2254            'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',
2255            'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'
2256
2257        FileInfo    is a list of all StringFileInfo and VarFileInfo structures.
2258
2259        StringFileInfo structures will have a list as an attribute named 'StringTable'
2260        containing all the StringTable structures. Each of those structures contains a
2261        dictionary 'entries' with all the key/value version information string pairs.
2262
2263        VarFileInfo structures will have a list as an attribute named 'Var' containing
2264        all Var structures. Each Var structure will have a dictionary as an attribute
2265        named 'entry' which will contain the name and value of the Var.
2266        """
2267
2268
2269        # Retrieve the data for the version info resource
2270        #
2271        start_offset = self.get_offset_from_rva( version_struct.OffsetToData )
2272        raw_data = self.__data__[ start_offset : start_offset+version_struct.Size ]
2273
2274
2275        # Map the main structure and the subsequent string
2276        #
2277        versioninfo_struct = self.__unpack_data__(
2278            self.__VS_VERSIONINFO_format__, raw_data,
2279            file_offset = start_offset )
2280
2281        if versioninfo_struct is None:
2282            return
2283
2284        ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()
2285        try:
2286            versioninfo_string = self.get_string_u_at_rva( ustr_offset )
2287        except PEFormatError, excp:
2288            self.__warnings.append(
2289                'Error parsing the version information, ' +
2290                'attempting to read VS_VERSION_INFO string. Can\'t ' +
2291                'read unicode string at offset 0x%x' % (
2292                ustr_offset ) )
2293
2294            versioninfo_string = None
2295
2296        # If the structure does not contain the expected name, it's assumed to be invalid
2297        #
2298        if versioninfo_string != u'VS_VERSION_INFO':
2299
2300            self.__warnings.append('Invalid VS_VERSION_INFO block')
2301            return
2302
2303
2304        # Set the PE object's VS_VERSIONINFO to this one
2305        #
2306        self.VS_VERSIONINFO = versioninfo_struct
2307
2308        # The the Key attribute to point to the unicode string identifying the structure
2309        #
2310        self.VS_VERSIONINFO.Key = versioninfo_string
2311
2312
2313        # Process the fixed version information, get the offset and structure
2314        #
2315        fixedfileinfo_offset = self.dword_align(
2316            versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),
2317            version_struct.OffsetToData)
2318        fixedfileinfo_struct = self.__unpack_data__(
2319            self.__VS_FIXEDFILEINFO_format__,
2320            raw_data[fixedfileinfo_offset:],
2321            file_offset = start_offset+fixedfileinfo_offset )
2322
2323        if not fixedfileinfo_struct:
2324            return
2325
2326
2327        # Set the PE object's VS_FIXEDFILEINFO to this one
2328        #
2329        self.VS_FIXEDFILEINFO = fixedfileinfo_struct
2330
2331
2332        # Start parsing all the StringFileInfo and VarFileInfo structures
2333        #
2334
2335        # Get the first one
2336        #
2337        stringfileinfo_offset = self.dword_align(
2338            fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),
2339            version_struct.OffsetToData)
2340        original_stringfileinfo_offset = stringfileinfo_offset
2341
2342
2343        # Set the PE object's attribute that will contain them all.
2344        #
2345        self.FileInfo = list()
2346
2347
2348        while True:
2349
2350            # Process the StringFileInfo/VarFileInfo struct
2351            #
2352            stringfileinfo_struct = self.__unpack_data__(
2353                self.__StringFileInfo_format__,
2354                raw_data[stringfileinfo_offset:],
2355                file_offset = start_offset+stringfileinfo_offset )
2356
2357            if stringfileinfo_struct is None:
2358                self.__warnings.append(
2359                    'Error parsing StringFileInfo/VarFileInfo struct' )
2360                return None
2361
2362            # Get the subsequent string defining the structure.
2363            #
2364            ustr_offset = ( version_struct.OffsetToData +
2365                stringfileinfo_offset + versioninfo_struct.sizeof() )
2366            try:
2367                stringfileinfo_string = self.get_string_u_at_rva( ustr_offset )
2368            except PEFormatError, excp:
2369                self.__warnings.append(
2370                    'Error parsing the version information, ' +
2371                    'attempting to read StringFileInfo string. Can\'t ' +
2372                    'read unicode string at offset 0x%x' %  ( ustr_offset ) )
2373                break
2374
2375            # Set such string as the Key attribute
2376            #
2377            stringfileinfo_struct.Key = stringfileinfo_string
2378
2379
2380            # Append the structure to the PE object's list
2381            #
2382            self.FileInfo.append(stringfileinfo_struct)
2383
2384
2385            # Parse a StringFileInfo entry
2386            #
2387            if stringfileinfo_string == u'StringFileInfo':
2388
2389                if stringfileinfo_struct.Type == 1 and stringfileinfo_struct.ValueLength == 0:
2390
2391                    stringtable_offset = self.dword_align(
2392                        stringfileinfo_offset + stringfileinfo_struct.sizeof() +
2393                            2*(len(stringfileinfo_string)+1),
2394                        version_struct.OffsetToData)
2395
2396                    stringfileinfo_struct.StringTable = list()
2397
2398                    # Process the String Table entries
2399                    #
2400                    while True:
2401                        stringtable_struct = self.__unpack_data__(
2402                            self.__StringTable_format__,
2403                            raw_data[stringtable_offset:],
2404                            file_offset = start_offset+stringtable_offset )
2405
2406                        if not stringtable_struct:
2407                            break
2408
2409                        ustr_offset = ( version_struct.OffsetToData + stringtable_offset +
2410                            stringtable_struct.sizeof() )
2411                        try:
2412                            stringtable_string = self.get_string_u_at_rva( ustr_offset )
2413                        except PEFormatError, excp:
2414                            self.__warnings.append(
2415                                'Error parsing the version information, ' +
2416                                'attempting to read StringTable string. Can\'t ' +
2417                                'read unicode string at offset 0x%x' % ( ustr_offset ) )
2418                            break
2419
2420                        stringtable_struct.LangID = stringtable_string
2421                        stringtable_struct.entries = dict()
2422                        stringtable_struct.entries_offsets = dict()
2423                        stringtable_struct.entries_lengths = dict()
2424                        stringfileinfo_struct.StringTable.append(stringtable_struct)
2425
2426                        entry_offset = self.dword_align(
2427                            stringtable_offset + stringtable_struct.sizeof() +
2428                                2*(len(stringtable_string)+1),
2429                            version_struct.OffsetToData)
2430
2431                        # Process all entries in the string table
2432                        #
2433
2434                        while entry_offset < stringtable_offset + stringtable_struct.Length:
2435
2436                            string_struct = self.__unpack_data__(
2437                                self.__String_format__, raw_data[entry_offset:],
2438                                file_offset = start_offset+entry_offset )
2439
2440                            if not string_struct:
2441                                break
2442
2443                            ustr_offset = ( version_struct.OffsetToData + entry_offset +
2444                                string_struct.sizeof() )
2445                            try:
2446                                key = self.get_string_u_at_rva( ustr_offset )
2447                                key_offset = self.get_offset_from_rva( ustr_offset )
2448                            except PEFormatError, excp:
2449                                self.__warnings.append(
2450                                    'Error parsing the version information, ' +
2451                                    'attempting to read StringTable Key string. Can\'t ' +
2452                                    'read unicode string at offset 0x%x' % ( ustr_offset ) )
2453                                break
2454
2455                            value_offset = self.dword_align(
2456                                2*(len(key)+1) + entry_offset + string_struct.sizeof(),
2457                                version_struct.OffsetToData)
2458
2459                            ustr_offset = version_struct.OffsetToData + value_offset
2460                            try:
2461                                value = self.get_string_u_at_rva( ustr_offset,
2462                                    max_length = string_struct.ValueLength )
2463                                value_offset = self.get_offset_from_rva( ustr_offset )
2464                            except PEFormatError, excp:
2465                                self.__warnings.append(
2466                                    'Error parsing the version information, ' +
2467                                    'attempting to read StringTable Value string. ' +
2468                                    'Can\'t read unicode string at offset 0x%x' % (
2469                                    ustr_offset ) )
2470                                break
2471
2472                            if string_struct.Length == 0:
2473                                entry_offset = stringtable_offset + stringtable_struct.Length
2474                            else:
2475                                entry_offset = self.dword_align(
2476                                    string_struct.Length+entry_offset, version_struct.OffsetToData)
2477
2478                            key_as_char = []
2479                            for c in key:
2480                                if ord(c)>128:
2481                                    key_as_char.append('\\x%02x' %ord(c))
2482                                else:
2483                                    key_as_char.append(c)
2484
2485                            key_as_char = ''.join(key_as_char)
2486
2487                            setattr(stringtable_struct, key_as_char, value)
2488                            stringtable_struct.entries[key] = value
2489                            stringtable_struct.entries_offsets[key] = (key_offset, value_offset)
2490                            stringtable_struct.entries_lengths[key] = (len(key), len(value))
2491
2492
2493                        stringtable_offset = self.dword_align(
2494                            stringtable_struct.Length + stringtable_offset,
2495                            version_struct.OffsetToData)
2496                        if stringtable_offset >= stringfileinfo_struct.Length:
2497                            break
2498
2499            # Parse a VarFileInfo entry
2500            #
2501            elif stringfileinfo_string == u'VarFileInfo':
2502
2503                varfileinfo_struct = stringfileinfo_struct
2504                varfileinfo_struct.name = 'VarFileInfo'
2505
2506                if varfileinfo_struct.Type == 1 and varfileinfo_struct.ValueLength == 0:
2507
2508                    var_offset = self.dword_align(
2509                        stringfileinfo_offset + varfileinfo_struct.sizeof() +
2510                            2*(len(stringfileinfo_string)+1),
2511                        version_struct.OffsetToData)
2512
2513                    varfileinfo_struct.Var = list()
2514
2515                    # Process all entries
2516                    #
2517
2518                    while True:
2519                        var_struct = self.__unpack_data__(
2520                            self.__Var_format__,
2521                            raw_data[var_offset:],
2522                            file_offset = start_offset+var_offset )
2523
2524                        if not var_struct:
2525                            break
2526
2527                        ustr_offset = ( version_struct.OffsetToData + var_offset +
2528                            var_struct.sizeof() )
2529                        try:
2530                            var_string = self.get_string_u_at_rva( ustr_offset )
2531                        except PEFormatError, excp:
2532                            self.__warnings.append(
2533                                'Error parsing the version information, ' +
2534                                'attempting to read VarFileInfo Var string. ' +
2535                                'Can\'t read unicode string at offset 0x%x' % (ustr_offset))
2536                            break
2537
2538
2539                        varfileinfo_struct.Var.append(var_struct)
2540
2541                        varword_offset = self.dword_align(
2542                            2*(len(var_string)+1) + var_offset + var_struct.sizeof(),
2543                            version_struct.OffsetToData)
2544                        orig_varword_offset = varword_offset
2545
2546                        while varword_offset < orig_varword_offset + var_struct.ValueLength:
2547                            word1 = self.get_word_from_data(
2548                                raw_data[varword_offset:varword_offset+2], 0)
2549                            word2 = self.get_word_from_data(
2550                                raw_data[varword_offset+2:varword_offset+4], 0)
2551                            varword_offset += 4
2552
2553                            var_struct.entry = {var_string: '0x%04x 0x%04x' % (word1, word2)}
2554
2555                        var_offset = self.dword_align(
2556                            var_offset+var_struct.Length, version_struct.OffsetToData)
2557
2558                        if var_offset <= var_offset+var_struct.Length:
2559                            break
2560
2561
2562
2563            # Increment and align the offset
2564            #
2565            stringfileinfo_offset = self.dword_align(
2566                stringfileinfo_struct.Length+stringfileinfo_offset,
2567                version_struct.OffsetToData)
2568
2569            # Check if all the StringFileInfo and VarFileInfo items have been processed
2570            #
2571            if stringfileinfo_struct.Length == 0 or stringfileinfo_offset >= versioninfo_struct.Length:
2572                break
2573
2574
2575
2576    def parse_export_directory(self, rva, size):
2577        """Parse the export directory.
2578
2579        Given the rva of the export directory, it will process all
2580        its entries.
2581
2582        The exports will be made available through a list "exports"
2583        containing a tuple with the following elements:
2584
2585            (ordinal, symbol_address, symbol_name)
2586
2587        And also through a dicionary "exports_by_ordinal" whose keys
2588        will be the ordinals and the values tuples of the from:
2589
2590            (symbol_address, symbol_name)
2591
2592        The symbol addresses are relative, not absolute.
2593        """
2594
2595        try:
2596            export_dir =  self.__unpack_data__(
2597                self.__IMAGE_EXPORT_DIRECTORY_format__, self.get_data(rva),
2598                file_offset = self.get_offset_from_rva(rva) )
2599        except PEFormatError:
2600            self.__warnings.append(
2601                'Error parsing export directory at RVA: 0x%x' % ( rva ) )
2602            return
2603
2604        if not export_dir:
2605            return
2606
2607        try:
2608            address_of_names = self.get_data(
2609                export_dir.AddressOfNames, export_dir.NumberOfNames*4)
2610            address_of_name_ordinals = self.get_data(
2611                export_dir.AddressOfNameOrdinals, export_dir.NumberOfNames*4)
2612            address_of_functions = self.get_data(
2613                export_dir.AddressOfFunctions, export_dir.NumberOfFunctions*4)
2614        except PEFormatError:
2615            self.__warnings.append(
2616                'Error parsing export directory at RVA: 0x%x' % ( rva ) )
2617            return
2618
2619        exports = []
2620
2621        for i in xrange(export_dir.NumberOfNames):
2622
2623
2624            symbol_name = self.get_string_at_rva(
2625                self.get_dword_from_data(address_of_names, i))
2626
2627            symbol_ordinal = self.get_word_from_data(
2628                address_of_name_ordinals, i)
2629
2630
2631            if symbol_ordinal*4<len(address_of_functions):
2632                symbol_address = self.get_dword_from_data(
2633                    address_of_functions, symbol_ordinal)
2634            else:
2635                # Corrupt? a bad pointer... we assume it's all
2636                # useless, no exports
2637                return None
2638
2639            # If the funcion's rva points within the export directory
2640            # it will point to a string with the forwarded symbol's string
2641            # instead of pointing the the function start address.
2642
2643            if symbol_address>=rva and symbol_address<rva+size:
2644                forwarder_str = self.get_string_at_rva(symbol_address)
2645            else:
2646                forwarder_str = None
2647
2648
2649            exports.append(
2650                ExportData(
2651                    ordinal = export_dir.Base+symbol_ordinal,
2652                    address = symbol_address,
2653                    name = symbol_name,
2654                    forwarder = forwarder_str))
2655
2656        ordinals = [exp.ordinal for exp in exports]
2657
2658        for idx in xrange(export_dir.NumberOfFunctions):
2659
2660            if not idx+export_dir.Base in ordinals:
2661                symbol_address = self.get_dword_from_data(
2662                    address_of_functions,
2663                    idx)
2664
2665                #
2666                # Checking for forwarder again.
2667                #
2668                if symbol_address>=rva and symbol_address<rva+size:
2669                    forwarder_str = self.get_string_at_rva(symbol_address)
2670                else:
2671                    forwarder_str = None
2672
2673                exports.append(
2674                    ExportData(
2675                        ordinal = export_dir.Base+idx,
2676                        address = symbol_address,
2677                        name = None,
2678                        forwarder = forwarder_str))
2679
2680        return ExportDirData(
2681                struct = export_dir,
2682                symbols = exports)
2683
2684
2685    def dword_align(self, offset, base):
2686        offset += base
2687        return (offset+3) - ((offset+3)%4) - base
2688
2689
2690
2691    def parse_delay_import_directory(self, rva, size):
2692        """Walk and parse the delay import directory."""
2693
2694        import_descs =  []
2695        while True:
2696            try:
2697                # If the RVA is invalid all would blow up. Some PEs seem to be
2698                # specially nasty and have an invalid RVA.
2699                data = self.get_data(rva)
2700            except PEFormatError, e:
2701                self.__warnings.append(
2702                    'Error parsing the Delay import directory at RVA: 0x%x' % ( rva ) )
2703                break
2704
2705            import_desc =  self.__unpack_data__(
2706                self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,
2707                data, file_offset = self.get_offset_from_rva(rva) )
2708
2709
2710            # If the structure is all zeores, we reached the end of the list
2711            if not import_desc or import_desc.all_zeroes():
2712                break
2713
2714
2715            rva += import_desc.sizeof()
2716
2717            try:
2718                import_data =  self.parse_imports(
2719                    import_desc.pINT,
2720                    import_desc.pIAT,
2721                    None)
2722            except PEFormatError, e:
2723                self.__warnings.append(
2724                    'Error parsing the Delay import directory. ' +
2725                    'Invalid import data at RVA: 0x%x' % ( rva ) )
2726                break
2727
2728            if not import_data:
2729                continue
2730
2731
2732            dll = self.get_string_at_rva(import_desc.szName)
2733            if dll:
2734                import_descs.append(
2735                    ImportDescData(
2736                        struct = import_desc,
2737                        imports = import_data,
2738                        dll = dll))
2739
2740        return import_descs
2741
2742
2743
2744    def parse_import_directory(self, rva, size):
2745        """Walk and parse the import directory."""
2746
2747        import_descs =  []
2748        while True:
2749            try:
2750                # If the RVA is invalid all would blow up. Some EXEs seem to be
2751                # specially nasty and have an invalid RVA.
2752                data = self.get_data(rva)
2753            except PEFormatError, e:
2754                self.__warnings.append(
2755                    'Error parsing the Import directory at RVA: 0x%x' % ( rva ) )
2756                break
2757
2758            import_desc =  self.__unpack_data__(
2759                self.__IMAGE_IMPORT_DESCRIPTOR_format__,
2760                data, file_offset = self.get_offset_from_rva(rva) )
2761
2762            # If the structure is all zeores, we reached the end of the list
2763            if not import_desc or import_desc.all_zeroes():
2764                break
2765
2766            rva += import_desc.sizeof()
2767
2768            try:
2769                import_data =  self.parse_imports(
2770                    import_desc.OriginalFirstThunk,
2771                    import_desc.FirstThunk,
2772                    import_desc.ForwarderChain)
2773            except PEFormatError, excp:
2774                self.__warnings.append(
2775                    'Error parsing the Import directory. ' +
2776                    'Invalid Import data at RVA: 0x%x' % ( rva ) )
2777                break
2778                #raise excp
2779
2780            if not import_data:
2781                continue
2782
2783            dll = self.get_string_at_rva(import_desc.Name)
2784            if dll:
2785                import_descs.append(
2786                    ImportDescData(
2787                        struct = import_desc,
2788                        imports = import_data,
2789                        dll = dll))
2790
2791        return import_descs
2792
2793
2794
2795    def parse_imports(self, original_first_thunk, first_thunk, forwarder_chain):
2796        """Parse the imported symbols.
2797
2798        It will fill a list, which will be avalable as the dictionary
2799        attribute "imports". Its keys will be the DLL names and the values
2800        all the symbols imported from that object.
2801        """
2802
2803        imported_symbols = []
2804        imports_section = self.get_section_by_rva(first_thunk)
2805        if not imports_section:
2806            raise PEFormatError, 'Invalid/corrupt imports.'
2807
2808
2809        # Import Lookup Table. Contains ordinals or pointers to strings.
2810        ilt = self.get_import_table(original_first_thunk)
2811        # Import Address Table. May have identical content to ILT if
2812        # PE file is not bounded, Will contain the address of the
2813        # imported symbols once the binary is loaded or if it is already
2814        # bound.
2815        iat = self.get_import_table(first_thunk)
2816
2817        # OC Patch:
2818        # Would crash if iat or ilt had None type
2819        if not iat and not ilt:
2820            raise PEFormatError(
2821                'Invalid Import Table information. ' +
2822                'Both ILT and IAT appear to be broken.')
2823
2824        if not iat and ilt:
2825            table = ilt
2826        elif iat and not ilt:
2827            table = iat
2828        elif ilt and ((len(ilt) and len(iat)==0) or (len(ilt) == len(iat))):
2829            table = ilt
2830        elif (ilt and len(ilt))==0 and (iat and len(iat)):
2831            table = iat
2832        else:
2833            return None
2834
2835        for idx in xrange(len(table)):
2836
2837            imp_ord = None
2838            imp_hint = None
2839            imp_name = None
2840            hint_name_table_rva = None
2841
2842            if table[idx].AddressOfData:
2843
2844                if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
2845                    ordinal_flag = IMAGE_ORDINAL_FLAG
2846                elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
2847                    ordinal_flag = IMAGE_ORDINAL_FLAG64
2848
2849                # If imported by ordinal, we will append the ordinal number
2850                #
2851                if table[idx].AddressOfData & ordinal_flag:
2852                    import_by_ordinal = True
2853                    imp_ord = table[idx].AddressOfData & 0xffff
2854                    imp_name = None
2855                else:
2856                    import_by_ordinal = False
2857                    try:
2858                        hint_name_table_rva = table[idx].AddressOfData & 0x7fffffff
2859                        data = self.get_data(hint_name_table_rva, 2)
2860                        # Get the Hint
2861                        imp_hint = self.get_word_from_data(data, 0)
2862                        imp_name = self.get_string_at_rva(table[idx].AddressOfData+2)
2863                    except PEFormatError, e:
2864                        pass
2865
2866            imp_address = first_thunk+self.OPTIONAL_HEADER.ImageBase+idx*4
2867
2868            if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:
2869                imp_bound = iat[idx].AddressOfData
2870            else:
2871                imp_bound = None
2872
2873            if imp_name != '' and (imp_ord or imp_name):
2874                imported_symbols.append(
2875                    ImportData(
2876                        import_by_ordinal = import_by_ordinal,
2877                        ordinal = imp_ord,
2878                        hint = imp_hint,
2879                        name = imp_name,
2880                        bound = imp_bound,
2881                        address = imp_address,
2882                        hint_name_table_rva = hint_name_table_rva))
2883
2884        return imported_symbols
2885
2886
2887
2888    def get_import_table(self, rva):
2889
2890        table = []
2891
2892        while True and rva:
2893            try:
2894                data = self.get_data(rva)
2895            except PEFormatError, e:
2896                self.__warnings.append(
2897                    'Error parsing the import table. ' +
2898                    'Invalid data at RVA: 0x%x' % ( rva ) )
2899                return None
2900
2901            if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
2902                format = self.__IMAGE_THUNK_DATA_format__
2903            elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
2904                format = self.__IMAGE_THUNK_DATA64_format__
2905
2906            thunk_data = self.__unpack_data__(
2907                format, data, file_offset=self.get_offset_from_rva(rva) )
2908
2909            if not thunk_data or thunk_data.all_zeroes():
2910                break
2911
2912            rva += thunk_data.sizeof()
2913
2914            table.append(thunk_data)
2915
2916        return table
2917
2918
2919    def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None):
2920        """Returns the data corresponding to the memory layout of the PE file.
2921
2922        The data includes the PE header and the sections loaded at offsets
2923        corresponding to their relative virtual addresses. (the VirtualAddress
2924        section header member).
2925        Any offset in this data corresponds to the absolute memory address
2926        ImageBase+offset.
2927
2928        The optional argument 'max_virtual_address' provides with means of limiting
2929        which section are processed.
2930        Any section with their VirtualAddress beyond this value will be skipped.
2931        Normally, sections with values beyond this range are just there to confuse
2932        tools. It's a common trick to see in packed executables.
2933
2934        If the 'ImageBase' optional argument is supplied, the file's relocations
2935        will be applied to the image by calling the 'relocate_image()' method.
2936        """
2937
2938        # Collect all sections in one code block
2939        data = self.header
2940        for section in self.sections:
2941
2942            # Miscellanous integrity tests.
2943            # Some packer will set these to bogus values to
2944            # make tools go nuts.
2945            #
2946            if section.Misc_VirtualSize == 0 or section.SizeOfRawData == 0:
2947                continue
2948
2949            if section.SizeOfRawData > len(self.__data__):
2950                continue
2951
2952            if section.PointerToRawData > len(self.__data__):
2953                continue
2954
2955            if section.VirtualAddress >= max_virtual_address:
2956                continue
2957
2958            padding_length = section.VirtualAddress - len(data)
2959
2960            if padding_length>0:
2961                data += '\0'*padding_length
2962            elif padding_length<0:
2963                data = data[:padding_length]
2964
2965            data += section.data
2966
2967        return data
2968
2969
2970    def get_data(self, rva, length=None):
2971        """Get data regardless of the section where it lies on.
2972
2973        Given a rva and the size of the chunk to retrieve, this method
2974        will find the section where the data lies and return the data.
2975        """
2976
2977        s = self.get_section_by_rva(rva)
2978
2979        if not s:
2980            if rva<len(self.header):
2981                if length:
2982                    end = rva+length
2983                else:
2984                    end = None
2985                return self.header[rva:end]
2986
2987            raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
2988
2989        return s.get_data(rva, length)
2990
2991
2992    def get_rva_from_offset(self, offset):
2993        """Get the rva corresponding to this file offset. """
2994
2995        s = self.get_section_by_offset(offset)
2996        if not s:
2997            raise PEFormatError("specified offset (0x%x) doesn't belong to any section." % offset)
2998        return s.get_rva_from_offset(offset)
2999
3000    def get_offset_from_rva(self, rva):
3001        """Get the file offset corresponding to this rva.
3002
3003        Given a rva , this method will find the section where the
3004        data lies and return the offset within the file.
3005        """
3006
3007        s = self.get_section_by_rva(rva)
3008        if not s:
3009
3010            raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
3011
3012        return s.get_offset_from_rva(rva)
3013
3014
3015    def get_string_at_rva(self, rva):
3016        """Get an ASCII string located at the given address."""
3017
3018        s = self.get_section_by_rva(rva)
3019        if not s:
3020            if rva<len(self.header):
3021                return self.get_string_from_data(rva, self.header)
3022            return None
3023
3024        return self.get_string_from_data(rva-s.VirtualAddress, s.data)
3025
3026
3027    def get_string_from_data(self, offset, data):
3028        """Get an ASCII string from within the data."""
3029
3030        # OC Patch
3031        b = None
3032
3033        try:
3034            b = data[offset]
3035        except IndexError:
3036            return ''
3037
3038        s = ''
3039        while ord(b):
3040            s += b
3041            offset += 1
3042            try:
3043                b = data[offset]
3044            except IndexError:
3045                break
3046
3047        return s
3048
3049
3050    def get_string_u_at_rva(self, rva, max_length = 2**16):
3051        """Get an Unicode string located at the given address."""
3052
3053        try:
3054            # If the RVA is invalid all would blow up. Some EXEs seem to be
3055            # specially nasty and have an invalid RVA.
3056            data = self.get_data(rva, 2)
3057        except PEFormatError, e:
3058            return None
3059
3060        #length = struct.unpack('<H', data)[0]
3061
3062        s = u''
3063        for idx in xrange(max_length):
3064            try:
3065                uchr = struct.unpack('<H', self.get_data(rva+2*idx, 2))[0]
3066            except struct.error:
3067                break
3068
3069            if unichr(uchr) == u'\0':
3070                break
3071            s += unichr(uchr)
3072
3073        return s
3074
3075
3076    def get_section_by_offset(self, offset):
3077        """Get the section containing the given file offset."""
3078
3079        sections = [s for s in self.sections if s.contains_offset(offset)]
3080
3081        if sections:
3082            return sections[0]
3083
3084        return None
3085
3086
3087    def get_section_by_rva(self, rva):
3088        """Get the section containing the given address."""
3089
3090        sections = [s for s in self.sections if s.contains_rva(rva)]
3091
3092        if sections:
3093            return sections[0]
3094
3095        return None
3096
3097    def __str__(self):
3098        return self.dump_info()
3099
3100
3101    def print_info(self):
3102        """Print all the PE header information in a human readable from."""
3103        print self.dump_info()
3104
3105
3106    def dump_info(self, dump=None):
3107        """Dump all the PE header information into human readable string."""
3108
3109
3110        if dump is None:
3111            dump = Dump()
3112
3113        warnings = self.get_warnings()
3114        if warnings:
3115            dump.add_header('Parsing Warnings')
3116            for warning in warnings:
3117                dump.add_line(warning)
3118                dump.add_newline()
3119
3120
3121        dump.add_header('DOS_HEADER')
3122        dump.add_lines(self.DOS_HEADER.dump())
3123        dump.add_newline()
3124
3125        dump.add_header('NT_HEADERS')
3126        dump.add_lines(self.NT_HEADERS.dump())
3127        dump.add_newline()
3128
3129        dump.add_header('FILE_HEADER')
3130        dump.add_lines(self.FILE_HEADER.dump())
3131
3132        image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
3133
3134        dump.add('Flags: ')
3135        flags = []
3136        for flag in image_flags:
3137            if getattr(self.FILE_HEADER, flag[0]):
3138                flags.append(flag[0])
3139        dump.add_line(', '.join(flags))
3140        dump.add_newline()
3141
3142        if hasattr(self, 'OPTIONAL_HEADER') and self.OPTIONAL_HEADER is not None:
3143            dump.add_header('OPTIONAL_HEADER')
3144            dump.add_lines(self.OPTIONAL_HEADER.dump())
3145
3146        dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
3147
3148        dump.add('DllCharacteristics: ')
3149        flags = []
3150        for flag in dll_characteristics_flags:
3151            if getattr(self.OPTIONAL_HEADER, flag[0]):
3152                flags.append(flag[0])
3153        dump.add_line(', '.join(flags))
3154        dump.add_newline()
3155
3156
3157        dump.add_header('PE Sections')
3158
3159        section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
3160
3161        for section in self.sections:
3162            dump.add_lines(section.dump())
3163            dump.add('Flags: ')
3164            flags = []
3165            for flag in section_flags:
3166                if getattr(section, flag[0]):
3167                    flags.append(flag[0])
3168            dump.add_line(', '.join(flags))
3169            dump.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section.get_entropy() )
3170            if md5 is not None:
3171                dump.add_line('MD5     hash: %s' % section.get_hash_md5() )
3172            if sha1 is not None:
3173                dump.add_line('SHA-1   hash: %s' % section.get_hash_sha1() )
3174            if sha256 is not None:
3175                dump.add_line('SHA-256 hash: %s' % section.get_hash_sha256() )
3176            if sha512 is not None:
3177                dump.add_line('SHA-512 hash: %s' % section.get_hash_sha512() )
3178            dump.add_newline()
3179
3180
3181
3182        if (hasattr(self, 'OPTIONAL_HEADER') and
3183            hasattr(self.OPTIONAL_HEADER, 'DATA_DIRECTORY') ):
3184
3185            dump.add_header('Directories')
3186            for idx in xrange(len(self.OPTIONAL_HEADER.DATA_DIRECTORY)):
3187                directory = self.OPTIONAL_HEADER.DATA_DIRECTORY[idx]
3188                dump.add_lines(directory.dump())
3189            dump.add_newline()
3190
3191
3192        if hasattr(self, 'VS_VERSIONINFO'):
3193            dump.add_header('Version Information')
3194            dump.add_lines(self.VS_VERSIONINFO.dump())
3195            dump.add_newline()
3196
3197            if hasattr(self, 'VS_FIXEDFILEINFO'):
3198                dump.add_lines(self.VS_FIXEDFILEINFO.dump())
3199                dump.add_newline()
3200
3201            if hasattr(self, 'FileInfo'):
3202                for entry in self.FileInfo:
3203                    dump.add_lines(entry.dump())
3204                    dump.add_newline()
3205
3206                    if hasattr(entry, 'StringTable'):
3207                        for st_entry in entry.StringTable:
3208                            [dump.add_line('  '+line) for line in st_entry.dump()]
3209                            dump.add_line('  LangID: '+st_entry.LangID)
3210                            dump.add_newline()
3211                            for str_entry in st_entry.entries.items():
3212                                dump.add_line('    '+str_entry[0]+': '+str_entry[1])
3213                        dump.add_newline()
3214
3215                    elif hasattr(entry, 'Var'):
3216                        for var_entry in entry.Var:
3217                            if hasattr(var_entry, 'entry'):
3218                                [dump.add_line('  '+line) for line in var_entry.dump()]
3219                                dump.add_line(
3220                                    '    ' + var_entry.entry.keys()[0] +
3221                                    ': ' + var_entry.entry.values()[0])
3222
3223                        dump.add_newline()
3224
3225
3226
3227        if hasattr(self, 'DIRECTORY_ENTRY_EXPORT'):
3228            dump.add_header('Exported symbols')
3229            dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())
3230            dump.add_newline()
3231            dump.add_line('%-10s   %-10s  %s' % ('Ordinal', 'RVA', 'Name'))
3232            for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
3233                dump.add('%-10d 0x%08Xh    %s' % (
3234                    export.ordinal, export.address, export.name))
3235                if export.forwarder:
3236                    dump.add_line(' forwarder: %s' % export.forwarder)
3237                else:
3238                    dump.add_newline()
3239
3240            dump.add_newline()
3241
3242        if hasattr(self, 'DIRECTORY_ENTRY_IMPORT'):
3243            dump.add_header('Imported symbols')
3244            for module in self.DIRECTORY_ENTRY_IMPORT:
3245                dump.add_lines(module.struct.dump())
3246                dump.add_newline()
3247                for symbol in module.imports:
3248
3249                    if symbol.import_by_ordinal is True:
3250                        dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
3251                            module.dll, str(symbol.ordinal)))
3252                    else:
3253                        dump.add('%s.%s Hint[%s]' % (
3254                            module.dll, symbol.name, str(symbol.hint)))
3255
3256                    if symbol.bound:
3257                        dump.add_line(' Bound: 0x%08X' % (symbol.bound))
3258                    else:
3259                        dump.add_newline()
3260                dump.add_newline()
3261
3262
3263        if hasattr(self, 'DIRECTORY_ENTRY_BOUND_IMPORT'):
3264            dump.add_header('Bound imports')
3265            for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
3266
3267                dump.add_lines(bound_imp_desc.struct.dump())
3268                dump.add_line('DLL: %s' % bound_imp_desc.name)
3269                dump.add_newline()
3270
3271                for bound_imp_ref in bound_imp_desc.entries:
3272                    dump.add_lines(bound_imp_ref.struct.dump(), 4)
3273                    dump.add_line('DLL: %s' % bound_imp_ref.name, 4)
3274                    dump.add_newline()
3275
3276
3277        if hasattr(self, 'DIRECTORY_ENTRY_DELAY_IMPORT'):
3278            dump.add_header('Delay Imported symbols')
3279            for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
3280
3281                dump.add_lines(module.struct.dump())
3282                dump.add_newline()
3283
3284                for symbol in module.imports:
3285                    if symbol.import_by_ordinal is True:
3286                        dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
3287                            module.dll, str(symbol.ordinal)))
3288                    else:
3289                        dump.add('%s.%s Hint[%s]' % (
3290                            module.dll, symbol.name, str(symbol.hint)))
3291
3292                    if symbol.bound:
3293                        dump.add_line(' Bound: 0x%08X' % (symbol.bound))
3294                    else:
3295                        dump.add_newline()
3296                dump.add_newline()
3297
3298
3299        if hasattr(self, 'DIRECTORY_ENTRY_RESOURCE'):
3300            dump.add_header('Resource directory')
3301
3302            dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())
3303
3304            for resource_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
3305
3306                if resource_type.name is not None:
3307                    dump.add_line('Name: [%s]' % resource_type.name, 2)
3308                else:
3309                    dump.add_line('Id: [0x%X] (%s)' % (
3310                        resource_type.struct.Id, RESOURCE_TYPE.get(
3311                            resource_type.struct.Id, '-')),
3312                        2)
3313
3314                dump.add_lines(resource_type.struct.dump(), 2)
3315
3316                if hasattr(resource_type, 'directory'):
3317
3318                    dump.add_lines(resource_type.directory.struct.dump(), 4)
3319
3320                    for resource_id in resource_type.directory.entries:
3321
3322                        if resource_id.name is not None:
3323                            dump.add_line('Name: [%s]' % resource_id.name, 6)
3324                        else:
3325                            dump.add_line('Id: [0x%X]' % resource_id.struct.Id, 6)
3326
3327                        dump.add_lines(resource_id.struct.dump(), 6)
3328
3329                        if hasattr(resource_id, 'directory'):
3330                            dump.add_lines(resource_id.directory.struct.dump(), 8)
3331
3332                            for resource_lang in resource_id.directory.entries:
3333                            #    dump.add_line('\\--- LANG [%d,%d][%s]' % (
3334                            #        resource_lang.data.lang,
3335                            #        resource_lang.data.sublang,
3336                            #        LANG[resource_lang.data.lang]), 8)
3337                                dump.add_lines(resource_lang.struct.dump(), 10)
3338                                dump.add_lines(resource_lang.data.struct.dump(), 12)
3339                dump.add_newline()
3340
3341            dump.add_newline()
3342
3343
3344        if ( hasattr(self, 'DIRECTORY_ENTRY_TLS') and
3345             self.DIRECTORY_ENTRY_TLS and
3346             self.DIRECTORY_ENTRY_TLS.struct ):
3347
3348            dump.add_header('TLS')
3349            dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())
3350            dump.add_newline()
3351
3352
3353        if hasattr(self, 'DIRECTORY_ENTRY_DEBUG'):
3354            dump.add_header('Debug information')
3355            for dbg in self.DIRECTORY_ENTRY_DEBUG:
3356                dump.add_lines(dbg.struct.dump())
3357                try:
3358                    dump.add_line('Type: '+DEBUG_TYPE[dbg.struct.Type])
3359                except KeyError:
3360                    dump.add_line('Type: 0x%x(Unknown)' % dbg.struct.Type)
3361                dump.add_newline()
3362
3363
3364        if hasattr(self, 'DIRECTORY_ENTRY_BASERELOC'):
3365            dump.add_header('Base relocations')
3366            for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
3367                dump.add_lines(base_reloc.struct.dump())
3368                for reloc in base_reloc.entries:
3369                    try:
3370                        dump.add_line('%08Xh %s' % (
3371                            reloc.rva, RELOCATION_TYPE[reloc.type][16:]), 4)
3372                    except KeyError:
3373                        dump.add_line('0x%08X 0x%x(Unknown)' % (
3374                            reloc.rva, reloc.type), 4)
3375                dump.add_newline()
3376
3377
3378        return dump.get_text()
3379
3380    # OC Patch
3381    def get_physical_by_rva(self, rva):
3382        """Gets the physical address in the PE file from an RVA value."""
3383        try:
3384            return self.get_offset_from_rva(rva)
3385        except Exception:
3386            return None
3387
3388
3389    ##
3390    # Double-Word get/set
3391    ##
3392
3393    def get_data_from_dword(self, dword):
3394        """Return a four byte string representing the double word value. (little endian)."""
3395        return struct.pack('<L', dword)
3396
3397
3398    def get_dword_from_data(self, data, offset):
3399        """Convert four bytes of data to a double word (little endian)
3400
3401        'offset' is assumed to index into a dword array. So setting it to
3402        N will return a dword out of the data sarting at offset N*4.
3403
3404        Returns None if the data can't be turned into a double word.
3405        """
3406
3407        if (offset+1)*4 > len(data):
3408            return None
3409
3410        return struct.unpack('<L', data[offset*4:(offset+1)*4])[0]
3411
3412
3413    def get_dword_at_rva(self, rva):
3414        """Return the double word value at the given RVA.
3415
3416        Returns None if the value can't be read, i.e. the RVA can't be mapped
3417        to a file offset.
3418        """
3419
3420        try:
3421            return self.get_dword_from_data(self.get_data(rva)[:4], 0)
3422        except PEFormatError:
3423            return None
3424
3425
3426    def get_dword_from_offset(self, offset):
3427        """Return the double word value at the given file offset. (little endian)"""
3428
3429        if offset+4 > len(self.__data__):
3430            return None
3431
3432        return self.get_dword_from_data(self.__data__[offset:offset+4], 0)
3433
3434
3435    def set_dword_at_rva(self, rva, dword):
3436        """Set the double word value at the file offset corresponding to the given RVA."""
3437        return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))
3438
3439
3440    def set_dword_at_offset(self, offset, dword):
3441        """Set the double word value at the given file offset."""
3442        return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))
3443
3444
3445
3446    ##
3447    # Word get/set
3448    ##
3449
3450    def get_data_from_word(self, word):
3451        """Return a two byte string representing the word value. (little endian)."""
3452        return struct.pack('<H', word)
3453
3454
3455    def get_word_from_data(self, data, offset):
3456        """Convert two bytes of data to a word (little endian)
3457
3458        'offset' is assumed to index into a word array. So setting it to
3459        N will return a dword out of the data sarting at offset N*2.
3460
3461        Returns None if the data can't be turned into a word.
3462        """
3463
3464        if (offset+1)*2 > len(data):
3465            return None
3466
3467        return struct.unpack('<H', data[offset*2:(offset+1)*2])[0]
3468
3469
3470    def get_word_at_rva(self, rva):
3471        """Return the word value at the given RVA.
3472
3473        Returns None if the value can't be read, i.e. the RVA can't be mapped
3474        to a file offset.
3475        """
3476
3477        try:
3478            return self.get_word_from_data(self.get_data(rva)[:2], 0)
3479        except PEFormatError:
3480            return None
3481
3482
3483    def get_word_from_offset(self, offset):
3484        """Return the word value at the given file offset. (little endian)"""
3485
3486        if offset+2 > len(self.__data__):
3487            return None
3488
3489        return self.get_word_from_data(self.__data__[offset:offset+2], 0)
3490
3491
3492    def set_word_at_rva(self, rva, word):
3493        """Set the word value at the file offset corresponding to the given RVA."""
3494        return self.set_bytes_at_rva(rva, self.get_data_from_word(word))
3495
3496
3497    def set_word_at_offset(self, offset, word):
3498        """Set the word value at the given file offset."""
3499        return self.set_bytes_at_offset(offset, self.get_data_from_word(word))
3500
3501
3502    ##
3503    # Quad-Word get/set
3504    ##
3505
3506    def get_data_from_qword(self, word):
3507        """Return a eight byte string representing the quad-word value. (little endian)."""
3508        return struct.pack('<Q', word)
3509
3510
3511    def get_qword_from_data(self, data, offset):
3512        """Convert eight bytes of data to a word (little endian)
3513
3514        'offset' is assumed to index into a word array. So setting it to
3515        N will return a dword out of the data sarting at offset N*8.
3516
3517        Returns None if the data can't be turned into a quad word.
3518        """
3519
3520        if (offset+1)*8 > len(data):
3521            return None
3522
3523        return struct.unpack('<Q', data[offset*8:(offset+1)*8])[0]
3524
3525
3526    def get_qword_at_rva(self, rva):
3527        """Return the quad-word value at the given RVA.
3528
3529        Returns None if the value can't be read, i.e. the RVA can't be mapped
3530        to a file offset.
3531        """
3532
3533        try:
3534            return self.get_qword_from_data(self.get_data(rva)[:8], 0)
3535        except PEFormatError:
3536            return None
3537
3538
3539    def get_qword_from_offset(self, offset):
3540        """Return the quad-word value at the given file offset. (little endian)"""
3541
3542        if offset+8 > len(self.__data__):
3543            return None
3544
3545        return self.get_qword_from_data(self.__data__[offset:offset+8], 0)
3546
3547
3548    def set_qword_at_rva(self, rva, qword):
3549        """Set the quad-word value at the file offset corresponding to the given RVA."""
3550        return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))
3551
3552
3553    def set_qword_at_offset(self, offset, qword):
3554        """Set the quad-word value at the given file offset."""
3555        return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))
3556
3557
3558
3559    ##
3560    # Set bytes
3561    ##
3562
3563
3564    def set_bytes_at_rva(self, rva, data):
3565        """Overwrite, with the given string, the bytes at the file offset corresponding to the given RVA.
3566
3567        Return True if successful, False otherwise. It can fail if the
3568        offset is outside the file's boundaries.
3569        """
3570
3571        offset = self.get_physical_by_rva(rva)
3572        if not offset:
3573            raise False
3574
3575        return self.set_bytes_at_offset(offset, data)
3576
3577
3578    def set_bytes_at_offset(self, offset, data):
3579        """Overwrite the bytes at the given file offset with the given string.
3580
3581        Return True if successful, False otherwise. It can fail if the
3582        offset is outside the file's boundaries.
3583        """
3584
3585        if not isinstance(data, str):
3586            raise TypeError('data should be of type: str')
3587
3588        if offset >= 0 and offset < len(self.__data__):
3589            self.__data__ = ( self.__data__[:offset] +
3590                data +
3591                self.__data__[offset+len(data):] )
3592        else:
3593            return False
3594
3595        # Refresh the section's data with the modified information
3596        #
3597        for section in self.sections:
3598            section_data_start = section.PointerToRawData
3599            section_data_end = section_data_start+section.SizeOfRawData
3600            section.data = self.__data__[section_data_start:section_data_end]
3601
3602        return True
3603
3604
3605
3606    def relocate_image(self, new_ImageBase):
3607        """Apply the relocation information to the image using the provided new image base.
3608
3609        This method will apply the relocation information to the image. Given the new base,
3610        all the relocations will be processed and both the raw data and the section's data
3611        will be fixed accordingly.
3612        The resulting image can be retrieved as well through the method:
3613
3614            get_memory_mapped_image()
3615
3616        In order to get something that would more closely match what could be found in memory
3617        once the Windows loader finished its work.
3618        """
3619
3620        relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase
3621
3622
3623        for reloc in self.DIRECTORY_ENTRY_BASERELOC:
3624
3625            virtual_address = reloc.struct.VirtualAddress
3626            size_of_block = reloc.struct.SizeOfBlock
3627
3628            # We iterate with an index because if the relocation is of type
3629            # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry
3630            # at once and skip it for the next interation
3631            #
3632            entry_idx = 0
3633            while entry_idx<len(reloc.entries):
3634
3635                entry = reloc.entries[entry_idx]
3636                entry_idx += 1
3637
3638                if entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_ABSOLUTE']:
3639                    # Nothing to do for this type of relocation
3640                    pass
3641
3642                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGH']:
3643                    # Fix the high 16bits of a relocation
3644                    #
3645                    # Add high 16bits of relocation_difference to the
3646                    # 16bit value at RVA=entry.rva
3647
3648                    self.set_word_at_rva(
3649                        entry.rva,
3650                        ( self.get_word_at_rva(entry.rva) + relocation_difference>>16)&0xffff )
3651
3652                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_LOW']:
3653                    # Fix the low 16bits of a relocation
3654                    #
3655                    # Add low 16 bits of relocation_difference to the 16bit value
3656                    # at RVA=entry.rva
3657
3658                    self.set_word_at_rva(
3659                        entry.rva,
3660                        ( self.get_word_at_rva(entry.rva) + relocation_difference)&0xffff)
3661
3662                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']:
3663                    # Handle all high and low parts of a 32bit relocation
3664                    #
3665                    # Add relocation_difference to the value at RVA=entry.rva
3666
3667                    self.set_dword_at_rva(
3668                        entry.rva,
3669                        self.get_dword_at_rva(entry.rva)+relocation_difference)
3670
3671                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHADJ']:
3672                    # Fix the high 16bits of a relocation and adjust
3673                    #
3674                    # Add high 16bits of relocation_difference to the 32bit value
3675                    # composed from the (16bit value at RVA=entry.rva)<<16 plus
3676                    # the 16bit value at the next relocation entry.
3677                    #
3678
3679                    # If the next entry is beyond the array's limits,
3680                    # abort... the table is corrupt
3681                    #
3682                    if entry_idx == len(reloc.entries):
3683                        break
3684
3685                    next_entry = reloc.entries[entry_idx]
3686                    entry_idx += 1
3687                    self.set_word_at_rva( entry.rva,
3688                        ((self.get_word_at_rva(entry.rva)<<16) + next_entry.rva +
3689                        relocation_difference & 0xffff0000) >> 16 )
3690
3691                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_DIR64']:
3692                    # Apply the difference to the 64bit value at the offset
3693                    # RVA=entry.rva
3694
3695                    self.set_qword_at_rva(
3696                        entry.rva,
3697                        self.get_qword_at_rva(entry.rva) + relocation_difference)
3698
3699
3700    def verify_checksum(self):
3701
3702        return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()
3703
3704
3705    def generate_checksum(self):
3706
3707        # Get the offset to the CheckSum field in the OptionalHeader
3708        #
3709        checksum_offset = self.OPTIONAL_HEADER.__file_offset__ + 0x40 # 64
3710
3711        checksum = 0
3712
3713        for i in range( len(self.__data__) / 4 ):
3714
3715            # Skip the checksum field
3716            #
3717            if i == checksum_offset / 4:
3718                continue
3719
3720            dword = struct.unpack('L', self.__data__[ i*4 : i*4+4 ])[0]
3721            checksum = (checksum & 0xffffffff) + dword + (checksum>>32)
3722            if checksum > 2**32:
3723                checksum = (checksum & 0xffffffff) + (checksum >> 32)
3724
3725        checksum = (checksum & 0xffff) + (checksum >> 16)
3726        checksum = (checksum) + (checksum >> 16)
3727        checksum = checksum & 0xffff
3728
3729        return checksum + len(self.__data__)
3730