1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- An abstraction that provides a file-reading mechanism.       ---*/
5/*---                                                      image.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Valgrind, a dynamic binary instrumentation
10   framework.
11
12   Copyright (C) 2013-2015 Mozilla Foundation
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32/* Contributed by Julian Seward <jseward@acm.org> */
33
34/* See the corresponding auxprogs/valgrind-di-server.c for a list of
35   cleanups for this file and itself. */
36
37#include "pub_core_basics.h"
38#include "pub_core_vki.h"
39#include "pub_core_libcbase.h"
40#include "pub_core_libcassert.h"
41#include "pub_core_libcprint.h"
42#include "pub_core_libcproc.h"     /* VG_(read_millisecond_timer) */
43#include "pub_core_libcfile.h"
44#include "priv_misc.h"             /* dinfo_zalloc/free/strdup */
45#include "priv_image.h"            /* self */
46
47#include "minilzo.h"
48
49/* These values (1024 entries of 8192 bytes each) gives a cache
50   size of 8MB. */
51#define CACHE_ENTRY_SIZE_BITS (12+1)
52#define CACHE_N_ENTRIES       1024
53
54#define CACHE_ENTRY_SIZE      (1 << CACHE_ENTRY_SIZE_BITS)
55
56/* An entry in the cache. */
57typedef
58   struct {
59      DiOffT off; // file offset for data[0]
60      SizeT  used; // 1 .. sizeof(data), or 0 to denote not-in-use
61      UChar  data[CACHE_ENTRY_SIZE];
62   }
63   CEnt;
64
65/* Source for files */
66typedef
67   struct {
68      // True: img is of local file.  False: img is from a server.
69      Bool  is_local;
70      // The fd for the local file, or sd for a remote server.
71      Int   fd;
72      // The name.  In ML_(dinfo_zalloc)'d space.  Used only for printing
73      // error messages; hence it doesn't really matter what this contains.
74      HChar* name;
75      // The rest of these fields are only valid when using remote files
76      // (that is, using a debuginfo server; hence when is_local==False)
77      // Session ID allocated to us by the server.  Cannot be zero.
78      ULong session_id;
79   }
80   Source;
81
82struct _DiImage {
83   // The source -- how to get hold of the file we are reading
84   Source source;
85   // Total size of the image.
86   SizeT size;
87   // The number of entries used.  0 .. CACHE_N_ENTRIES
88   UInt  ces_used;
89   // Pointers to the entries.  ces[0 .. ces_used-1] are non-NULL.
90   // ces[ces_used .. CACHE_N_ENTRIES-1] are NULL.
91   // The non-NULL entries may be arranged arbitrarily.  We expect to use
92   // a pseudo-LRU scheme though.
93   CEnt* ces[CACHE_N_ENTRIES];
94};
95
96/* A frame.  The first 4 bytes of |data| give the kind of the frame,
97   and the rest of it is kind-specific data. */
98typedef  struct { UChar* data; SizeT n_data; }  Frame;
99
100static void write_UInt_le ( /*OUT*/UChar* dst, UInt n )
101{
102   Int i;
103   for (i = 0; i <= 3; i++) {
104      dst[i] = (UChar)(n & 0xFF);
105      n >>= 8;
106   }
107}
108
109static UInt read_UInt_le ( const UChar* src )
110{
111   UInt r = 0;
112   Int i;
113   for (i = 3; i >= 0; i--) {
114      r <<= 8;
115      r += (UInt)src[i];
116   }
117   return r;
118}
119
120static void write_ULong_le ( /*OUT*/UChar* dst, ULong n )
121{
122   Int i;
123   for (i = 0; i <= 7; i++) {
124      dst[i] = (UChar)(n & 0xFF);
125      n >>= 8;
126   }
127}
128
129static ULong read_ULong_le ( const UChar* src )
130{
131   ULong r = 0;
132   Int i;
133   for (i = 7; i >= 0; i--) {
134      r <<= 8;
135      r += (ULong)src[i];
136   }
137   return r;
138}
139
140
141/* Set |sd| to be blocking.  Returns True on success. */
142static Bool set_blocking ( int sd )
143{
144   Int res;
145   res = VG_(fcntl)(sd, VKI_F_GETFL, 0/*ignored*/);
146   if (res != -1)
147      res = VG_(fcntl)(sd, VKI_F_SETFL, res & ~VKI_O_NONBLOCK);
148   return (res != -1);
149}
150
151/* Tries to read 'len' bytes from fd, blocking if necessary.  Assumes
152   fd has been set in blocking mode.  If it returns with the number of
153   bytes read < len, it means that either fd was closed, or there was
154   an error on it. */
155static Int my_read ( Int fd, UChar* buf, Int len )
156{
157   Int nRead = 0;
158   while (1) {
159      if (nRead == len) return nRead;
160      vg_assert(nRead < len);
161      Int nNeeded = len - nRead;
162      vg_assert(nNeeded > 0);
163      Int n = VG_(read)(fd, &buf[nRead], nNeeded);
164      if (n <= 0) return nRead; /* error or EOF */
165      nRead += n;
166   }
167}
168
169/* Tries to write 'len' bytes to fd, blocking if necessary.  Assumes
170   fd has been set in blocking mode.  If it returns with the number of
171   bytes written < len, it means that either fd was closed, or there was
172   an error on it. */
173static Int my_write ( Int fd, const UChar* buf, Int len )
174{
175   Int nWritten = 0;
176   while (1) {
177      if (nWritten == len) return nWritten;
178      vg_assert(nWritten < len);
179      Int nStillToDo = len - nWritten;
180      vg_assert(nStillToDo > 0);
181      Int n = VG_(write_socket)(fd, &buf[nWritten], nStillToDo);
182      if (n < 0) return nWritten; /* error or EOF */
183      nWritten += n;
184   }
185}
186
187/* If we lost communication with the remote server, just give up.
188   Recovering is too difficult. */
189static void give_up__comms_lost(void)
190{
191   VG_(umsg)("\n");
192   VG_(umsg)(
193      "Valgrind: debuginfo reader: Lost communication with the remote\n");
194   VG_(umsg)(
195      "Valgrind: debuginfo server.  I can't recover.  Giving up.  Sorry.\n");
196   VG_(umsg)("\n");
197   VG_(exit)(1);
198   /*NOTREACHED*/
199}
200
201static void give_up__image_overrun(void)
202{
203   VG_(umsg)("\n");
204   VG_(umsg)(
205      "Valgrind: debuginfo reader: Possibly corrupted debuginfo file.\n");
206   VG_(umsg)(
207      "Valgrind: I can't recover.  Giving up.  Sorry.\n");
208   VG_(umsg)("\n");
209   VG_(exit)(1);
210   /*NOTREACHED*/
211}
212
213/* "Do" a transaction: that is, send the given frame to the server and
214   return the frame it sends back.  Caller owns the resulting frame
215   and must free it.  A NULL return means the transaction failed for
216   some reason. */
217static Frame* do_transaction ( Int sd, const Frame* req )
218{
219   if (0) VG_(printf)("CLIENT: send %c%c%c%c\n",
220                      req->data[0], req->data[1], req->data[2], req->data[3]);
221
222   /* What goes on the wire is:
223         adler(le32) n_data(le32) data[0 .. n_data-1]
224      where the checksum covers n_data as well as data[].
225   */
226   /* The initial Adler-32 value */
227   UInt adler = VG_(adler32)(0, NULL, 0);
228
229   /* Fold in the length field, encoded as le32. */
230   UChar wr_first8[8];
231   write_UInt_le(&wr_first8[4], req->n_data);
232   adler = VG_(adler32)(adler, &wr_first8[4], 4);
233   /* Fold in the data values */
234   adler = VG_(adler32)(adler, req->data, req->n_data);
235   write_UInt_le(&wr_first8[0], adler);
236
237   Int r = my_write(sd, &wr_first8[0], 8);
238   if (r != 8) return NULL;
239   vg_assert(req->n_data >= 4); // else ill formed -- no KIND field
240   r = my_write(sd, req->data, req->n_data);
241   if (r != req->n_data) return NULL;
242
243   /* So, the request is sent.  Now get a request of the same format
244      out of the channel. */
245   UChar rd_first8[8];  // adler32; length32
246   r = my_read(sd, &rd_first8[0], 8);
247   if (r != 8) return NULL;
248   UInt rd_adler = read_UInt_le(&rd_first8[0]);
249   UInt rd_len   = read_UInt_le(&rd_first8[4]);
250   /* Allocate a Frame to hold the result data, and read into it. */
251   // Reject obviously-insane length fields.
252   if (rd_len < 4 || rd_len > 4*1024*1024) return NULL;
253   Frame* res = ML_(dinfo_zalloc)("di.do_transaction.1", sizeof(Frame));
254   res->n_data = rd_len;
255   res->data = ML_(dinfo_zalloc)("di.do_transaction.2", rd_len);
256   r = my_read(sd, res->data, res->n_data);
257   if (r != rd_len) return NULL;
258
259   if (0) VG_(printf)("CLIENT: recv %c%c%c%c\n",
260                      res->data[0], res->data[1], res->data[2], res->data[3]);
261
262   /* Compute the checksum for the received data, and check it. */
263   adler = VG_(adler32)(0, NULL, 0); // initial value
264   adler = VG_(adler32)(adler, &rd_first8[4], 4);
265   if (res->n_data > 0)
266      adler = VG_(adler32)(adler, res->data, res->n_data);
267
268   if (adler/*computed*/ != rd_adler/*expected*/) return NULL;
269   return res;
270}
271
272static void free_Frame ( Frame* fr )
273{
274   vg_assert(fr && fr->data);
275   ML_(dinfo_free)(fr->data);
276   ML_(dinfo_free)(fr);
277}
278
279static Frame* mk_Frame_noargs ( const HChar* tag )
280{
281   vg_assert(VG_(strlen)(tag) == 4);
282   Frame* f = ML_(dinfo_zalloc)("di.mFn.1", sizeof(Frame));
283   f->n_data = 4;
284   f->data = ML_(dinfo_zalloc)("di.mFn.2", f->n_data);
285   VG_(memcpy)(&f->data[0], tag, 4);
286   return f;
287}
288
289static Frame* mk_Frame_le64_le64_le64 ( const HChar* tag,
290                                        ULong n1, ULong n2, ULong n3 )
291{
292   vg_assert(VG_(strlen)(tag) == 4);
293   Frame* f = ML_(dinfo_zalloc)("di.mFlll.1", sizeof(Frame));
294   f->n_data = 4 + 3*8;
295   f->data = ML_(dinfo_zalloc)("di.mFlll.2", f->n_data);
296   VG_(memcpy)(&f->data[0], tag, 4);
297   write_ULong_le(&f->data[4 + 0*8], n1);
298   write_ULong_le(&f->data[4 + 1*8], n2);
299   write_ULong_le(&f->data[4 + 2*8], n3);
300   return f;
301}
302
303static Frame* mk_Frame_asciiz ( const HChar* tag, const HChar* str )
304{
305   vg_assert(VG_(strlen)(tag) == 4);
306   Frame* f = ML_(dinfo_zalloc)("di.mFa.1", sizeof(Frame));
307   SizeT n_str = VG_(strlen)(str);
308   f->n_data = 4 + n_str + 1;
309   f->data = ML_(dinfo_zalloc)("di.mFa.2", f->n_data);
310   VG_(memcpy)(&f->data[0], tag, 4);
311   VG_(memcpy)(&f->data[4], str, n_str);
312   vg_assert(f->data[4 + n_str] == 0);
313   return f;
314}
315
316static Bool parse_Frame_le64 ( const Frame* fr, const HChar* tag,
317                               /*OUT*/ULong* n1 )
318{
319   vg_assert(VG_(strlen)(tag) == 4);
320   if (!fr || !fr->data) return False;
321   if (fr->n_data < 4) return False;
322   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
323   if (fr->n_data != 4 + 1*8) return False;
324   *n1 = read_ULong_le(&fr->data[4 + 0*8]);
325   return True;
326}
327
328static Bool parse_Frame_le64_le64 ( const Frame* fr, const HChar* tag,
329                                    /*OUT*/ULong* n1, /*OUT*/ULong* n2 )
330{
331   vg_assert(VG_(strlen)(tag) == 4);
332   if (!fr || !fr->data) return False;
333   if (fr->n_data < 4) return False;
334   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
335   if (fr->n_data != 4 + 2*8) return False;
336   *n1 = read_ULong_le(&fr->data[4 + 0*8]);
337   *n2 = read_ULong_le(&fr->data[4 + 1*8]);
338   return True;
339}
340
341static Bool parse_Frame_asciiz ( const Frame* fr, const HChar* tag,
342                                 /*OUT*/UChar** str )
343{
344   vg_assert(VG_(strlen)(tag) == 4);
345   if (!fr || !fr->data) return False;
346   if (fr->n_data < 4) return False;
347   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
348   if (fr->n_data < 5) return False; // else there isn't even enough
349                                     // space for the terminating zero
350   /* Find the terminating zero and ensure it's right at the end
351      of the data.  If not, the frame is malformed. */
352   SizeT i = 4;
353   while (True) {
354      if (i >= fr->n_data) break;
355      if (fr->data[i] == 0) break;
356      i++;
357   }
358   vg_assert(i <= fr->n_data);
359   if (i == fr->n_data-1 && fr->data[i] == 0) {
360      *str = &fr->data[4];
361      return True;
362   } else {
363      return False;
364   }
365}
366
367static Bool parse_Frame_le64_le64_le64_bytes (
368               const Frame* fr, const HChar* tag,
369               /*OUT*/ULong* n1, /*OUT*/ULong* n2, /*OUT*/ULong* n3,
370               /*OUT*/UChar** data, /*OUT*/ULong* n_data
371            )
372{
373   vg_assert(VG_(strlen)(tag) == 4);
374   if (!fr || !fr->data) return False;
375   if (fr->n_data < 4) return False;
376   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
377   if (fr->n_data < 4 + 3*8) return False;
378   *n1 = read_ULong_le(&fr->data[4 + 0*8]);
379   *n2 = read_ULong_le(&fr->data[4 + 1*8]);
380   *n3 = read_ULong_le(&fr->data[4 + 2*8]);
381   *data   = &fr->data[4 + 3*8];
382   *n_data = fr->n_data - (4 + 3*8);
383   vg_assert(fr->n_data >= 4 + 3*8);
384   return True;
385}
386
387static DiOffT block_round_down ( DiOffT i )
388{
389   return i & ((DiOffT)~(CACHE_ENTRY_SIZE-1));
390}
391
392/* Is this offset inside this CEnt? */
393static inline Bool is_in_CEnt ( const CEnt* cent, DiOffT off )
394{
395   /* This assertion is checked by set_CEnt, so checking it here has
396      no benefit, whereas skipping it does remove it from the hottest
397      path. */
398   /* vg_assert(cent->used > 0 && cent->used <= CACHE_ENTRY_SIZE); */
399   /* What we want to return is:
400        cent->off <= off && off < cent->off + cent->used;
401      This is however a very hot path, so here's alternative that uses
402      only one conditional branch, using the following transformation,
403      where all quantities are unsigned:
404              x >= LO && x < LO+N
405         -->  x-LO >= 0 && x-LO < LO+N-LO
406         -->  x-LO >= 0 && x-LO < N
407         -->  x-LO < N
408      This is however only valid when the original bounds, that is, LO
409      .. LO+N-1, do not wrap around the end of the address space.  That
410      is, we require that LO <= LO+N-1.  But that's OK .. we don't
411      expect wraparounds in CEnts or for that matter any object
412      allocated from C-land.  See Hacker's Delight, Chapter 4.1,
413      "Checking Bounds of Integers", for more details.
414   */
415   return off - cent->off < cent->used;
416}
417
418/* Allocate a new CEnt, connect it to |img|, and return its index. */
419static UInt alloc_CEnt ( DiImage* img )
420{
421   vg_assert(img);
422   vg_assert(img->ces_used < CACHE_N_ENTRIES);
423   UInt entNo = img->ces_used;
424   img->ces_used++;
425   vg_assert(img->ces[entNo] == NULL);
426   img->ces[entNo] = ML_(dinfo_zalloc)("di.alloc_CEnt.1", sizeof(CEnt));
427   return entNo;
428}
429
430/* Move the given entry to the top and slide those above it down by 1,
431   to make space. */
432static void move_CEnt_to_top ( DiImage* img, UInt entNo )
433{
434   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
435   vg_assert(entNo > 0 && entNo < img->ces_used);
436   CEnt* tmp = img->ces[entNo];
437   while (entNo > 0) {
438      img->ces[entNo] = img->ces[entNo-1];
439      entNo--;
440   }
441   img->ces[0] = tmp;
442}
443
444/* Set the given entry so that it has a chunk of the file containing
445   the given offset.  It is this function that brings data into the
446   cache, either by reading the local file or pulling it from the
447   remote server. */
448static void set_CEnt ( const DiImage* img, UInt entNo, DiOffT off )
449{
450   SizeT len;
451   DiOffT off_orig = off;
452   vg_assert(img);
453   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
454   vg_assert(entNo >= 0 && entNo < img->ces_used);
455   vg_assert(off < img->size);
456   vg_assert(img->ces[entNo] != NULL);
457   /* Compute [off, +len) as the slice we are going to read. */
458   off = block_round_down(off);
459   len = img->size - off;
460   if (len > CACHE_ENTRY_SIZE) len = CACHE_ENTRY_SIZE;
461   /* It is conceivable that the 'len > 0' bit could fail if we make
462      an image with a zero sized file.  But then no 'get' request on
463      that image would be valid. */
464   vg_assert(len > 0 && len <= CACHE_ENTRY_SIZE);
465   vg_assert(off + len <= img->size);
466   vg_assert(off <= off_orig && off_orig < off+len);
467   /* So, read  off .. off+len-1  into the entry. */
468   CEnt* ce = img->ces[entNo];
469
470   if (0) {
471      static UInt t_last = 0;
472      static ULong nread = 0;
473      UInt now = VG_(read_millisecond_timer)();
474      UInt delay = now - t_last;
475      t_last = now;
476      nread += len;
477      VG_(printf)("XXXXXXXX (tot %'llu)  read %'lu  offset %'llu  delay %'u\n",
478                  nread, len, off, delay);
479   }
480
481   if (img->source.is_local) {
482      // Simple: just read it
483      SysRes sr = VG_(pread)(img->source.fd, &ce->data[0], (Int)len, off);
484      vg_assert(!sr_isError(sr));
485   } else {
486      // Not so simple: poke the server
487      vg_assert(img->source.session_id > 0);
488      Frame* req
489         = mk_Frame_le64_le64_le64("READ", img->source.session_id, off, len);
490      Frame* res = do_transaction(img->source.fd, req);
491      free_Frame(req); req = NULL;
492      if (!res) goto server_fail;
493      ULong  rx_session_id = 0, rx_off = 0, rx_len = 0, rx_zdata_len = 0;
494      UChar* rx_data = NULL;
495      /* Pretty confusing.  rx_sessionid, rx_off and rx_len are copies
496         of the values that we requested in the READ frame just above,
497         so we can be sure that the server is responding to the right
498         request.  It just copies them from the request into the
499         response.  rx_data is the actual data, and rx_zdata_len is
500         its compressed length.  Hence rx_len must equal len, but
501         rx_zdata_len can be different -- smaller, hopefully.. */
502      if (!parse_Frame_le64_le64_le64_bytes
503          (res, "RDOK", &rx_session_id, &rx_off,
504                        &rx_len, &rx_data, &rx_zdata_len))
505         goto server_fail;
506      if (rx_session_id != img->source.session_id
507          || rx_off != off || rx_len != len || rx_data == NULL)
508         goto server_fail;
509
510      //VG_(memcpy)(&ce->data[0], rx_data, len);
511      // Decompress into the destination buffer
512      // Tell the lib the max number of output bytes it can write.
513      // After the call, this holds the number of bytes actually written,
514      // and it's an error if it is different.
515      lzo_uint out_len = len;
516      Int lzo_rc = lzo1x_decompress_safe(rx_data, rx_zdata_len,
517                                         &ce->data[0], &out_len,
518                                         NULL);
519      Bool ok = lzo_rc == LZO_E_OK && out_len == len;
520      if (!ok) goto server_fail;
521
522      free_Frame(res); res = NULL;
523      goto end_of_else_clause;
524     server_fail:
525      /* The server screwed up somehow.  Now what? */
526      if (res) {
527         UChar* reason = NULL;
528         if (parse_Frame_asciiz(res, "FAIL", &reason)) {
529            VG_(umsg)("set_CEnt (reading data from DI server): fail: "
530                      "%s\n", reason);
531         } else {
532            VG_(umsg)("set_CEnt (reading data from DI server): fail: "
533                      "unknown reason\n");
534         }
535         free_Frame(res); res = NULL;
536      } else {
537         VG_(umsg)("set_CEnt (reading data from DI server): fail: "
538                   "server unexpectedly closed the connection\n");
539      }
540      give_up__comms_lost();
541      /* NOTREACHED */
542      vg_assert(0);
543     end_of_else_clause:
544      {}
545   }
546
547   ce->off  = off;
548   ce->used = len;
549   vg_assert(ce->used > 0 && ce->used <= CACHE_ENTRY_SIZE);
550}
551
552__attribute__((noinline))
553static UChar get_slowcase ( DiImage* img, DiOffT off )
554{
555   /* Stay sane .. */
556   vg_assert(off < img->size);
557   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
558   UInt i;
559   /* Start the search at entry 1, since the fast-case function
560      checked slot zero already. */
561   for (i = 1; i < img->ces_used; i++) {
562      vg_assert(img->ces[i]);
563      if (is_in_CEnt(img->ces[i], off))
564         break;
565   }
566   vg_assert(i <= img->ces_used);
567   if (i == img->ces_used) {
568      /* It's not in any entry.  Either allocate a new entry or
569         recycle the LRU one. */
570      if (img->ces_used == CACHE_N_ENTRIES) {
571         /* All entries in use.  Recycle the (ostensibly) LRU one. */
572         set_CEnt(img, CACHE_N_ENTRIES-1, off);
573         i = CACHE_N_ENTRIES-1;
574      } else {
575         /* Allocate a new one, and fill it in. */
576         UInt entNo = alloc_CEnt(img);
577         set_CEnt(img, entNo, off);
578         i = entNo;
579      }
580   } else {
581      /* We found it at position 'i'. */
582      vg_assert(i > 0);
583   }
584   if (i > 0) {
585      move_CEnt_to_top(img, i);
586      i = 0;
587   }
588   vg_assert(is_in_CEnt(img->ces[i], off));
589   return img->ces[i]->data[ off - img->ces[i]->off ];
590}
591
592// This is called a lot, so do the usual fast/slow split stuff on it. */
593static inline UChar get ( DiImage* img, DiOffT off )
594{
595   /* Most likely case is, it's in the ces[0] position. */
596   /* ML_(img_from_local_file) requests a read for ces[0] when
597      creating the image.  Hence slot zero is always non-NULL, so we
598      can skip this test. */
599   if (LIKELY(/* img->ces[0] != NULL && */
600              is_in_CEnt(img->ces[0], off))) {
601      return img->ces[0]->data[ off - img->ces[0]->off ];
602   }
603   /* Else we'll have to fish around for it. */
604   return get_slowcase(img, off);
605}
606
607/* Create an image from a file in the local filesystem.  This is
608   relatively straightforward. */
609DiImage* ML_(img_from_local_file)(const HChar* fullpath)
610{
611   SysRes         fd;
612   struct vg_stat stat_buf;
613   DiOffT         size;
614
615   fd = VG_(open)(fullpath, VKI_O_RDONLY, 0);
616   if (sr_isError(fd))
617      return NULL;
618
619   if (VG_(fstat)(sr_Res(fd), &stat_buf) != 0) {
620      VG_(close)(sr_Res(fd));
621      return NULL;
622   }
623
624   size = stat_buf.size;
625   if (size == 0 || size == DiOffT_INVALID
626       || /* size is unrepresentable as a SizeT */
627          size != (DiOffT)(SizeT)(size)) {
628      VG_(close)(sr_Res(fd));
629      return NULL;
630   }
631
632   DiImage* img = ML_(dinfo_zalloc)("di.image.ML_iflf.1", sizeof(DiImage));
633   img->source.is_local = True;
634   img->source.fd       = sr_Res(fd);
635   img->size            = size;
636   img->ces_used        = 0;
637   img->source.name     = ML_(dinfo_strdup)("di.image.ML_iflf.2", fullpath);
638   /* img->ces is already zeroed out */
639   vg_assert(img->source.fd >= 0);
640
641   /* Force the zeroth entry to be the first chunk of the file.
642      That's likely to be the first part that's requested anyway, and
643      loading it at this point forcing img->cent[0] to always be
644      non-empty, thereby saving us an is-it-empty check on the fast
645      path in get(). */
646   UInt entNo = alloc_CEnt(img);
647   vg_assert(entNo == 0);
648   set_CEnt(img, 0, 0);
649
650   return img;
651}
652
653
654/* Create an image from a file on a remote debuginfo server.  This is
655   more complex.  There are lots of ways in which it can fail. */
656DiImage* ML_(img_from_di_server)(const HChar* filename,
657                                 const HChar* serverAddr)
658{
659   if (filename == NULL || serverAddr == NULL)
660      return NULL;
661
662   /* The filename must be a plain filename -- no slashes at all. */
663   if (VG_(strchr)(filename, '/') != NULL)
664      return NULL;
665
666   /* Try to connect to the server.  A side effect of this is to parse
667      and reject, if syntactically invalid, |serverAddr|.  Reasons why
668      this could fail:
669      - serverAddr is not of the form d.d.d.d:d or d.d.d.d
670      - attempt to connect to that address:port failed
671   */
672   Int sd = VG_(connect_via_socket)(serverAddr);
673   if (sd < 0)
674      return NULL;
675   if (!set_blocking(sd))
676      return NULL;
677   Int one = 1;
678   Int sr = VG_(setsockopt)(sd, VKI_IPPROTO_TCP, VKI_TCP_NODELAY,
679                            &one, sizeof(one));
680   vg_assert(sr == 0);
681
682   /* Ok, we got a connection.  Ask it for version string, so as to be
683      reasonably sure we're talking to an instance of
684      auxprogs/valgrind-di-server and not to some other random program
685      that happens to be listening on that port. */
686   Frame* req = mk_Frame_noargs("VERS");
687   Frame* res = do_transaction(sd, req);
688   if (res == NULL)
689      goto fail; // do_transaction failed?!
690   UChar* vstr = NULL;
691   if (!parse_Frame_asciiz(res, "VEOK", &vstr))
692      goto fail; // unexpected response kind, or invalid ID string
693   vg_assert(vstr);
694   if (VG_(strcmp)("Valgrind Debuginfo Server, Version 1",
695                   (const HChar*)vstr) != 0)
696      goto fail; // wrong version string
697   free_Frame(req);
698   free_Frame(res);
699   req = NULL;
700   res = NULL;
701
702   /* Server seems plausible.  Present it with the name of the file we
703      want and see if it'll give us back a session ID for it. */
704   req = mk_Frame_asciiz("OPEN", filename);
705   res = do_transaction(sd, req);
706   if (res == NULL)
707      goto fail;
708   ULong session_id = 0, size = 0;
709   if (!parse_Frame_le64_le64(res, "OPOK", &session_id, &size))
710      goto fail;
711   free_Frame(req);
712   free_Frame(res);
713   req = NULL;
714   res = NULL;
715
716   /* We have a session ID.  We're ready to roll. */
717   DiImage* img = ML_(dinfo_zalloc)("di.image.ML_ifds.1", sizeof(DiImage));
718   img->source.is_local   = False;
719   img->source.fd         = sd;
720   img->source.session_id = session_id;
721   img->size              = size;
722   img->ces_used          = 0;
723   img->source.name       = ML_(dinfo_zalloc)("di.image.ML_ifds.2",
724                                              20 + VG_(strlen)(filename)
725                                                 + VG_(strlen)(serverAddr));
726   VG_(sprintf)(img->source.name, "%s at %s", filename, serverAddr);
727
728   /* img->ces is already zeroed out */
729   vg_assert(img->source.fd >= 0);
730
731   /* See comment on equivalent bit in ML_(img_from_local_file) for
732      rationale. */
733   UInt entNo = alloc_CEnt(img);
734   vg_assert(entNo == 0);
735   set_CEnt(img, 0, 0);
736
737   return img;
738
739  fail:
740   free_Frame(req);
741   if (res) {
742      UChar* reason = NULL;
743      if (parse_Frame_asciiz(res, "FAIL", &reason)) {
744         // HACK: if it's just telling us that the file can't
745         // be opened, don't print it, else we'll get flooded with
746         // such complaints, one for each main object for which there
747         // isn't a debuginfo file on the server.
748         if (0 != VG_(strcmp)((const HChar*)reason, "OPEN: cannot open file"))
749            VG_(umsg)("ML_(img_from_di_server): fail: %s\n", reason);
750      } else {
751         VG_(umsg)("ML_(img_from_di_server): fail: unknown reason\n");
752      }
753      free_Frame(res);
754   }
755   VG_(close)(sd);
756   return NULL;
757}
758
759void ML_(img_done)(DiImage* img)
760{
761   vg_assert(img);
762   if (img->source.is_local) {
763      /* Close the file; nothing else to do. */
764      vg_assert(img->source.session_id == 0);
765      VG_(close)(img->source.fd);
766   } else {
767      /* Close the socket.  The server can detect this and will scrub
768         the connection when it happens, so there's no need to tell it
769         explicitly by sending it a "CLOSE" message, or any such. */
770      vg_assert(img->source.session_id != 0);
771      VG_(close)(img->source.fd);
772   }
773
774   /* Free up the cache entries, ultimately |img| itself. */
775   UInt i;
776   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
777   for (i = 0; i < img->ces_used; i++) {
778      ML_(dinfo_free)(img->ces[i]);
779   }
780   /* Take the opportunity to sanity check the rest. */
781   for (i = i; i < img->ces_used; i++) {
782      vg_assert(img->ces[i] == NULL);
783   }
784   ML_(dinfo_free)(img->source.name);
785   ML_(dinfo_free)(img);
786}
787
788DiOffT ML_(img_size)(const DiImage* img)
789{
790   vg_assert(img);
791   return img->size;
792}
793
794inline Bool ML_(img_valid)(const DiImage* img, DiOffT offset, SizeT size)
795{
796   vg_assert(img);
797   vg_assert(offset != DiOffT_INVALID);
798   return img->size > 0 && offset + size <= (DiOffT)img->size;
799}
800
801__attribute__((noinline))
802static void ensure_valid_failed (const DiImage* img, DiOffT offset, SizeT size,
803                                 const HChar* caller)
804{
805   VG_(umsg)("Valgrind: debuginfo reader: ensure_valid failed:\n");
806   VG_(umsg)("Valgrind:   during call to %s\n", caller);
807   VG_(umsg)("Valgrind:   request for range [%llu, +%lu) exceeds\n",
808             offset, size);
809   VG_(umsg)("Valgrind:   valid image size of %lu for image:\n",
810             img->size);
811   VG_(umsg)("Valgrind:   \"%s\"\n", img->source.name);
812   give_up__image_overrun();
813}
814
815/* Check the given range is valid, and if not, shut down the system.
816   An invalid range would imply that we're trying to read outside the
817   image, which normally means the image is corrupted somehow, or the
818   caller is buggy.  Recovering is too complex, and we have
819   probably-corrupt debuginfo, so just give up. */
820static void ensure_valid(const DiImage* img, DiOffT offset, SizeT size,
821                         const HChar* caller)
822{
823   if (LIKELY(ML_(img_valid)(img, offset, size)))
824      return;
825   else
826      ensure_valid_failed(img, offset, size, caller);
827}
828
829
830void ML_(img_get)(/*OUT*/void* dst,
831                  DiImage* img, DiOffT offset, SizeT size)
832{
833   vg_assert(img);
834   vg_assert(size > 0);
835   ensure_valid(img, offset, size, "ML_(img_get)");
836   SizeT i;
837   for (i = 0; i < size; i++) {
838      ((UChar*)dst)[i] = get(img, offset + i);
839   }
840}
841
842SizeT ML_(img_get_some)(/*OUT*/void* dst,
843                        DiImage* img, DiOffT offset, SizeT size)
844{
845   vg_assert(img);
846   vg_assert(size > 0);
847   ensure_valid(img, offset, size, "ML_(img_get_some)");
848   UChar* dstU = (UChar*)dst;
849   /* Use |get| in the normal way to get the first byte of the range.
850      This guarantees to put the cache entry containing |offset| in
851      position zero. */
852   dstU[0] = get(img, offset);
853   /* Now just read as many bytes as we can (or need) directly out of
854      entry zero, without bothering to call |get| each time. */
855   const CEnt* ce = img->ces[0];
856   vg_assert(ce && ce->used >= 1);
857   vg_assert(is_in_CEnt(ce, offset));
858   SizeT nToCopy = size - 1;
859   SizeT nAvail  = (SizeT)(ce->used - (offset + 1 - ce->off));
860   vg_assert(nAvail >= 0 && nAvail <= ce->used-1);
861   if (nAvail < nToCopy) nToCopy = nAvail;
862   VG_(memcpy)(&dstU[1], &ce->data[offset + 1 - ce->off], nToCopy);
863   return nToCopy + 1;
864}
865
866
867SizeT ML_(img_strlen)(DiImage* img, DiOffT off)
868{
869   ensure_valid(img, off, 1, "ML_(img_strlen)");
870   SizeT i = 0;
871   while (get(img, off + i) != 0) i++;
872   return i;
873}
874
875HChar* ML_(img_strdup)(DiImage* img, const HChar* cc, DiOffT offset)
876{
877   ensure_valid(img, offset, 1, "ML_(img_strdup)");
878   SizeT  len = ML_(img_strlen)(img, offset);
879   HChar* res = ML_(dinfo_zalloc)(cc, len+1);
880   SizeT  i;
881   for (i = 0; i < len; i++) {
882      res[i] = get(img, offset+i);
883   }
884   vg_assert(res[len] == 0);
885   return res;
886}
887
888Int ML_(img_strcmp)(DiImage* img, DiOffT off1, DiOffT off2)
889{
890   ensure_valid(img, off1, 1, "ML_(img_strcmp)(first arg)");
891   ensure_valid(img, off2, 1, "ML_(img_strcmp)(second arg)");
892   while (True) {
893      UChar c1 = get(img, off1);
894      UChar c2 = get(img, off2);
895      if (c1 < c2) return -1;
896      if (c1 > c2) return 1;
897      if (c1 == 0) return 0;
898      off1++; off2++;
899   }
900}
901
902Int ML_(img_strcmp_c)(DiImage* img, DiOffT off1, const HChar* str2)
903{
904   ensure_valid(img, off1, 1, "ML_(img_strcmp_c)");
905   while (True) {
906      UChar c1 = get(img, off1);
907      UChar c2 = *(const UChar*)str2;
908      if (c1 < c2) return -1;
909      if (c1 > c2) return 1;
910      if (c1 == 0) return 0;
911      off1++; str2++;
912   }
913}
914
915UChar ML_(img_get_UChar)(DiImage* img, DiOffT offset)
916{
917   ensure_valid(img, offset, 1, "ML_(img_get_UChar)");
918   return get(img, offset);
919}
920
921UShort ML_(img_get_UShort)(DiImage* img, DiOffT offset)
922{
923   UShort r;
924   ML_(img_get)(&r, img, offset, sizeof(r));
925   return r;
926}
927
928UInt ML_(img_get_UInt)(DiImage* img, DiOffT offset)
929{
930   UInt r;
931   ML_(img_get)(&r, img, offset, sizeof(r));
932   return r;
933}
934
935ULong ML_(img_get_ULong)(DiImage* img, DiOffT offset)
936{
937   ULong r;
938   ML_(img_get)(&r, img, offset, sizeof(r));
939   return r;
940}
941
942
943/*
944 * This routine for calculating the CRC for a separate debug file
945 * is GPLed code borrowed from GNU binutils.
946 */
947UInt ML_(img_calc_gnu_debuglink_crc32)(DiImage* img)
948{
949  static const UInt crc32_table[256] =
950    {
951      0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
952      0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
953      0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
954      0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
955      0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
956      0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
957      0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
958      0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
959      0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
960      0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
961      0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
962      0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
963      0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
964      0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
965      0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
966      0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
967      0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
968      0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
969      0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
970      0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
971      0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
972      0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
973      0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
974      0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
975      0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
976      0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
977      0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
978      0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
979      0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
980      0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
981      0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
982      0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
983      0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
984      0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
985      0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
986      0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
987      0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
988      0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
989      0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
990      0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
991      0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
992      0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
993      0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
994      0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
995      0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
996      0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
997      0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
998      0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
999      0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
1000      0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
1001      0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
1002      0x2d02ef8d
1003    };
1004
1005   vg_assert(img);
1006
1007   /* If the image is local, calculate the CRC here directly.  If it's
1008      remote, forward the request to the server. */
1009   if (img->source.is_local) {
1010      /* Work through the image in 1 KB chunks. */
1011      UInt   crc      = 0xFFFFFFFF;
1012      DiOffT img_szB  = ML_(img_size)(img);
1013      DiOffT curr_off = 0;
1014      while (1) {
1015         vg_assert(curr_off >= 0 && curr_off <= img_szB);
1016         if (curr_off == img_szB) break;
1017         DiOffT avail = img_szB - curr_off;
1018         vg_assert(avail > 0 && avail <= img_szB);
1019         if (avail > 1024) avail = 1024;
1020         UChar buf[1024];
1021         SizeT nGot = ML_(img_get_some)(buf, img, curr_off, avail);
1022         vg_assert(nGot >= 1 && nGot <= avail);
1023         UInt i;
1024         for (i = 0; i < (UInt)nGot; i++)
1025            crc = crc32_table[(crc ^ buf[i]) & 0xff] ^ (crc >> 8);
1026         curr_off += nGot;
1027      }
1028      return ~crc & 0xFFFFFFFF;
1029   } else {
1030      Frame* req = mk_Frame_noargs("CRC3");
1031      Frame* res = do_transaction(img->source.fd, req);
1032      if (!res) goto remote_crc_fail;
1033      ULong crc32 = 0;
1034      if (!parse_Frame_le64(res, "CROK", &crc32)) goto remote_crc_fail;
1035      if ((crc32 & ~0xFFFFFFFFULL) != 0) goto remote_crc_fail;
1036      free_Frame(req);
1037      free_Frame(res);
1038      return (UInt)crc32;
1039     remote_crc_fail:
1040
1041      // XXXX common this up with the READ diagnostic cases
1042      if (res) {
1043         UChar* reason = NULL;
1044         if (parse_Frame_asciiz(res, "FAIL", &reason)) {
1045            VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1046                      "%s\n", reason);
1047         } else {
1048            VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1049                      "unknown reason\n");
1050         }
1051      } else {
1052         VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1053                   "server unexpectedly closed the connection\n");
1054      }
1055
1056      if (req) free_Frame(req);
1057      if (res) free_Frame(res);
1058      // FIXME: now what?
1059      give_up__comms_lost();
1060      /* NOTREACHED */
1061      vg_assert(0);
1062   }
1063   /*NOTREACHED*/
1064   vg_assert(0);
1065}
1066
1067////////////////////////////////////////////////////
1068#include "minilzo-inl.c"
1069
1070/*--------------------------------------------------------------------*/
1071/*--- end                                                  image.c ---*/
1072/*--------------------------------------------------------------------*/
1073