1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- An abstraction that provides a file-reading mechanism.       ---*/
5/*---                                                      image.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Valgrind, a dynamic binary instrumentation
10   framework.
11
12   Copyright (C) 2013-2013 Mozilla Foundation
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32/* Contributed by Julian Seward <jseward@acm.org> */
33
34/* See the corresponding auxprogs/valgrind-di-server.c for a list of
35   cleanups for this file and itself. */
36
37#include "pub_core_basics.h"
38#include "pub_core_vki.h"
39#include "pub_core_libcbase.h"
40#include "pub_core_libcassert.h"
41#include "pub_core_libcprint.h"
42#include "pub_core_libcproc.h"     /* VG_(read_millisecond_timer) */
43#include "pub_core_libcfile.h"
44#include "priv_misc.h"             /* dinfo_zalloc/free/strdup */
45#include "priv_image.h"            /* self */
46
47#include "minilzo.h"
48
49/* These values (1024 entries of 8192 bytes each) gives a cache
50   size of 8MB. */
51#define CACHE_ENTRY_SIZE_BITS (12+1)
52#define CACHE_N_ENTRIES       1024
53
54#define CACHE_ENTRY_SIZE      (1 << CACHE_ENTRY_SIZE_BITS)
55
56/* An entry in the cache. */
57typedef
58   struct {
59      DiOffT off; // file offset for data[0]
60      SizeT  used; // 1 .. sizeof(data), or 0 to denote not-in-use
61      UChar  data[CACHE_ENTRY_SIZE];
62   }
63   CEnt;
64
65/* Source for files */
66typedef
67   struct {
68      // True: img is of local file.  False: img is from a server.
69      Bool  is_local;
70      // The fd for the local file, or sd for a remote server.
71      Int   fd;
72      // The name.  In ML_(dinfo_zalloc)'d space.  Used only for printing
73      // error messages; hence it doesn't really matter what this contains.
74      HChar* name;
75      // The rest of these fields are only valid when using remote files
76      // (that is, using a debuginfo server; hence when is_local==False)
77      // Session ID allocated to us by the server.  Cannot be zero.
78      ULong session_id;
79   }
80   Source;
81
82struct _DiImage {
83   // The source -- how to get hold of the file we are reading
84   Source source;
85   // Total size of the image.
86   SizeT size;
87   // The number of entries used.  0 .. CACHE_N_ENTRIES
88   UInt  ces_used;
89   // Pointers to the entries.  ces[0 .. ces_used-1] are non-NULL.
90   // ces[ces_used .. CACHE_N_ENTRIES-1] are NULL.
91   // The non-NULL entries may be arranged arbitrarily.  We expect to use
92   // a pseudo-LRU scheme though.
93   CEnt* ces[CACHE_N_ENTRIES];
94};
95
96/* A frame.  The first 4 bytes of |data| give the kind of the frame,
97   and the rest of it is kind-specific data. */
98typedef  struct { UChar* data; SizeT n_data; }  Frame;
99
100static void write_UInt_le ( /*OUT*/UChar* dst, UInt n )
101{
102   Int i;
103   for (i = 0; i <= 3; i++) {
104      dst[i] = (UChar)(n & 0xFF);
105      n >>= 8;
106   }
107}
108
109static UInt read_UInt_le ( const UChar* src )
110{
111   UInt r = 0;
112   Int i;
113   for (i = 3; i >= 0; i--) {
114      r <<= 8;
115      r += (UInt)src[i];
116   }
117   return r;
118}
119
120static void write_ULong_le ( /*OUT*/UChar* dst, ULong n )
121{
122   Int i;
123   for (i = 0; i <= 7; i++) {
124      dst[i] = (UChar)(n & 0xFF);
125      n >>= 8;
126   }
127}
128
129static ULong read_ULong_le ( const UChar* src )
130{
131   ULong r = 0;
132   Int i;
133   for (i = 7; i >= 0; i--) {
134      r <<= 8;
135      r += (ULong)src[i];
136   }
137   return r;
138}
139
140
141/* Set |sd| to be blocking.  Returns True on success. */
142static Bool set_blocking ( int sd )
143{
144   Int res;
145   res = VG_(fcntl)(sd, VKI_F_GETFL, 0/*ignored*/);
146   if (res != -1)
147      res = VG_(fcntl)(sd, VKI_F_SETFL, res & ~VKI_O_NONBLOCK);
148   return (res != -1);
149}
150
151/* Tries to read 'len' bytes from fd, blocking if necessary.  Assumes
152   fd has been set in blocking mode.  If it returns with the number of
153   bytes read < len, it means that either fd was closed, or there was
154   an error on it. */
155static Int my_read ( Int fd, UChar* buf, Int len )
156{
157   Int nRead = 0;
158   while (1) {
159      if (nRead == len) return nRead;
160      vg_assert(nRead < len);
161      Int nNeeded = len - nRead;
162      vg_assert(nNeeded > 0);
163      Int n = VG_(read)(fd, &buf[nRead], nNeeded);
164      if (n <= 0) return nRead; /* error or EOF */
165      nRead += n;
166   }
167}
168
169/* Tries to write 'len' bytes to fd, blocking if necessary.  Assumes
170   fd has been set in blocking mode.  If it returns with the number of
171   bytes written < len, it means that either fd was closed, or there was
172   an error on it. */
173static Int my_write ( Int fd, const UChar* buf, Int len )
174{
175   Int nWritten = 0;
176   while (1) {
177      if (nWritten == len) return nWritten;
178      vg_assert(nWritten < len);
179      Int nStillToDo = len - nWritten;
180      vg_assert(nStillToDo > 0);
181      Int n = VG_(write_socket)(fd, &buf[nWritten], nStillToDo);
182      if (n < 0) return nWritten; /* error or EOF */
183      nWritten += n;
184   }
185}
186
187/* If we lost communication with the remote server, just give up.
188   Recovering is too difficult. */
189static void give_up__comms_lost(void)
190{
191   VG_(umsg)("\n");
192   VG_(umsg)(
193      "Valgrind: debuginfo reader: Lost communication with the remote\n");
194   VG_(umsg)(
195      "Valgrind: debuginfo server.  I can't recover.  Giving up.  Sorry.\n");
196   VG_(umsg)("\n");
197   VG_(exit)(1);
198   /*NOTREACHED*/
199}
200
201static void give_up__image_overrun(void)
202{
203   VG_(umsg)("\n");
204   VG_(umsg)(
205      "Valgrind: debuginfo reader: Possibly corrupted debuginfo file.\n");
206   VG_(umsg)(
207      "Valgrind: I can't recover.  Giving up.  Sorry.\n");
208   VG_(umsg)("\n");
209   VG_(exit)(1);
210   /*NOTREACHED*/
211}
212
213/* "Do" a transaction: that is, send the given frame to the server and
214   return the frame it sends back.  Caller owns the resulting frame
215   and must free it.  A NULL return means the transaction failed for
216   some reason. */
217static Frame* do_transaction ( Int sd, const Frame* req )
218{
219   if (0) VG_(printf)("CLIENT: send %c%c%c%c\n",
220                      req->data[0], req->data[1], req->data[2], req->data[3]);
221
222   /* What goes on the wire is:
223         adler(le32) n_data(le32) data[0 .. n_data-1]
224      where the checksum covers n_data as well as data[].
225   */
226   /* The initial Adler-32 value */
227   UInt adler = VG_(adler32)(0, NULL, 0);
228
229   /* Fold in the length field, encoded as le32. */
230   UChar wr_first8[8];
231   write_UInt_le(&wr_first8[4], req->n_data);
232   adler = VG_(adler32)(adler, &wr_first8[4], 4);
233   /* Fold in the data values */
234   adler = VG_(adler32)(adler, req->data, req->n_data);
235   write_UInt_le(&wr_first8[0], adler);
236
237   Int r = my_write(sd, &wr_first8[0], 8);
238   if (r != 8) return NULL;
239   vg_assert(req->n_data >= 4); // else ill formed -- no KIND field
240   r = my_write(sd, req->data, req->n_data);
241   if (r != req->n_data) return NULL;
242
243   /* So, the request is sent.  Now get a request of the same format
244      out of the channel. */
245   UChar rd_first8[8];  // adler32; length32
246   r = my_read(sd, &rd_first8[0], 8);
247   if (r != 8) return NULL;
248   UInt rd_adler = read_UInt_le(&rd_first8[0]);
249   UInt rd_len   = read_UInt_le(&rd_first8[4]);
250   /* Allocate a Frame to hold the result data, and read into it. */
251   // Reject obviously-insane length fields.
252   if (rd_len < 4 || rd_len > 4*1024*1024) return NULL;
253   Frame* res = ML_(dinfo_zalloc)("di.do_transaction.1", sizeof(Frame));
254   res->n_data = rd_len;
255   res->data = ML_(dinfo_zalloc)("di.do_transaction.2", rd_len);
256   r = my_read(sd, res->data, res->n_data);
257   if (r != rd_len) return NULL;
258
259   if (0) VG_(printf)("CLIENT: recv %c%c%c%c\n",
260                      res->data[0], res->data[1], res->data[2], res->data[3]);
261
262   /* Compute the checksum for the received data, and check it. */
263   adler = VG_(adler32)(0, NULL, 0); // initial value
264   adler = VG_(adler32)(adler, &rd_first8[4], 4);
265   if (res->n_data > 0)
266      adler = VG_(adler32)(adler, res->data, res->n_data);
267
268   if (adler/*computed*/ != rd_adler/*expected*/) return NULL;
269   return res;
270}
271
272static void free_Frame ( Frame* fr )
273{
274   vg_assert(fr && fr->data);
275   ML_(dinfo_free)(fr->data);
276   ML_(dinfo_free)(fr);
277}
278
279static Frame* mk_Frame_noargs ( const HChar* tag )
280{
281   vg_assert(VG_(strlen)(tag) == 4);
282   Frame* f = ML_(dinfo_zalloc)("di.mFn.1", sizeof(Frame));
283   f->n_data = 4;
284   f->data = ML_(dinfo_zalloc)("di.mFn.2", f->n_data);
285   VG_(memcpy)(&f->data[0], tag, 4);
286   return f;
287}
288
289static Frame* mk_Frame_le64_le64_le64 ( const HChar* tag,
290                                        ULong n1, ULong n2, ULong n3 )
291{
292   vg_assert(VG_(strlen)(tag) == 4);
293   Frame* f = ML_(dinfo_zalloc)("di.mFlll.1", sizeof(Frame));
294   f->n_data = 4 + 3*8;
295   f->data = ML_(dinfo_zalloc)("di.mFlll.2", f->n_data);
296   VG_(memcpy)(&f->data[0], tag, 4);
297   write_ULong_le(&f->data[4 + 0*8], n1);
298   write_ULong_le(&f->data[4 + 1*8], n2);
299   write_ULong_le(&f->data[4 + 2*8], n3);
300   return f;
301}
302
303static Frame* mk_Frame_asciiz ( const HChar* tag, const HChar* str )
304{
305   vg_assert(VG_(strlen)(tag) == 4);
306   Frame* f = ML_(dinfo_zalloc)("di.mFa.1", sizeof(Frame));
307   SizeT n_str = VG_(strlen)(str);
308   f->n_data = 4 + n_str + 1;
309   f->data = ML_(dinfo_zalloc)("di.mFa.2", f->n_data);
310   VG_(memcpy)(&f->data[0], tag, 4);
311   VG_(memcpy)(&f->data[4], str, n_str);
312   vg_assert(f->data[4 + n_str] == 0);
313   return f;
314}
315
316static Bool parse_Frame_le64 ( const Frame* fr, const HChar* tag,
317                               /*OUT*/ULong* n1 )
318{
319   vg_assert(VG_(strlen)(tag) == 4);
320   if (!fr || !fr->data) return False;
321   if (fr->n_data < 4) return False;
322   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
323   if (fr->n_data != 4 + 1*8) return False;
324   *n1 = read_ULong_le(&fr->data[4 + 0*8]);
325   return True;
326}
327
328static Bool parse_Frame_le64_le64 ( const Frame* fr, const HChar* tag,
329                                    /*OUT*/ULong* n1, /*OUT*/ULong* n2 )
330{
331   vg_assert(VG_(strlen)(tag) == 4);
332   if (!fr || !fr->data) return False;
333   if (fr->n_data < 4) return False;
334   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
335   if (fr->n_data != 4 + 2*8) return False;
336   *n1 = read_ULong_le(&fr->data[4 + 0*8]);
337   *n2 = read_ULong_le(&fr->data[4 + 1*8]);
338   return True;
339}
340
341static Bool parse_Frame_asciiz ( const Frame* fr, const HChar* tag,
342                                 /*OUT*/UChar** str )
343{
344   vg_assert(VG_(strlen)(tag) == 4);
345   if (!fr || !fr->data) return False;
346   if (fr->n_data < 4) return False;
347   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
348   if (fr->n_data < 5) return False; // else there isn't even enough
349                                     // space for the terminating zero
350   /* Find the terminating zero and ensure it's right at the end
351      of the data.  If not, the frame is malformed. */
352   SizeT i = 4;
353   while (True) {
354      if (i >= fr->n_data) break;
355      if (fr->data[i] == 0) break;
356      i++;
357   }
358   vg_assert(i <= fr->n_data);
359   if (i == fr->n_data-1 && fr->data[i] == 0) {
360      *str = &fr->data[4];
361      return True;
362   } else {
363      return False;
364   }
365}
366
367static Bool parse_Frame_le64_le64_le64_bytes (
368               const Frame* fr, const HChar* tag,
369               /*OUT*/ULong* n1, /*OUT*/ULong* n2, /*OUT*/ULong* n3,
370               /*OUT*/UChar** data, /*OUT*/ULong* n_data
371            )
372{
373   vg_assert(VG_(strlen)(tag) == 4);
374   if (!fr || !fr->data) return False;
375   if (fr->n_data < 4) return False;
376   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
377   if (fr->n_data < 4 + 3*8) return False;
378   *n1 = read_ULong_le(&fr->data[4 + 0*8]);
379   *n2 = read_ULong_le(&fr->data[4 + 1*8]);
380   *n3 = read_ULong_le(&fr->data[4 + 2*8]);
381   *data   = &fr->data[4 + 3*8];
382   *n_data = fr->n_data - (4 + 3*8);
383   vg_assert(fr->n_data >= 4 + 3*8);
384   return True;
385}
386
387static DiOffT block_round_down ( DiOffT i )
388{
389   return i & ((DiOffT)~(CACHE_ENTRY_SIZE-1));
390}
391
392/* Is this offset inside this CEnt? */
393static inline Bool is_in_CEnt ( const CEnt* cent, DiOffT off )
394{
395   /* This assertion is checked by set_CEnt, so checking it here has
396      no benefit, whereas skipping it does remove it from the hottest
397      path. */
398   /* vg_assert(cent->used > 0 && cent->used <= CACHE_ENTRY_SIZE); */
399   /* What we want to return is:
400        cent->off <= off && off < cent->off + cent->used;
401      This is however a very hot path, so here's alternative that uses
402      only one conditional branch, using the following transformation,
403      where all quantities are unsigned:
404              x >= LO && x < LO+N
405         -->  x-LO >= 0 && x-LO < LO+N-LO
406         -->  x-LO >= 0 && x-LO < N
407         -->  x-LO < N
408      This is however only valid when the original bounds, that is, LO
409      .. LO+N-1, do not wrap around the end of the address space.  That
410      is, we require that LO <= LO+N-1.  But that's OK .. we don't
411      expect wraparounds in CEnts or for that matter any object
412      allocated from C-land.  See Hacker's Delight, Chapter 4.1,
413      "Checking Bounds of Integers", for more details.
414   */
415   return off - cent->off < cent->used;
416}
417
418/* Allocate a new CEnt, connect it to |img|, and return its index. */
419static UInt alloc_CEnt ( DiImage* img )
420{
421   vg_assert(img);
422   vg_assert(img->ces_used < CACHE_N_ENTRIES);
423   UInt entNo = img->ces_used;
424   img->ces_used++;
425   vg_assert(img->ces[entNo] == NULL);
426   img->ces[entNo] = ML_(dinfo_zalloc)("di.alloc_CEnt.1", sizeof(CEnt));
427   return entNo;
428}
429
430/* Move the given entry to the top and slide those above it down by 1,
431   to make space. */
432static void move_CEnt_to_top ( DiImage* img, UInt entNo )
433{
434   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
435   vg_assert(entNo > 0 && entNo < img->ces_used);
436   CEnt* tmp = img->ces[entNo];
437   while (entNo > 0) {
438      img->ces[entNo] = img->ces[entNo-1];
439      entNo--;
440   }
441   img->ces[0] = tmp;
442}
443
444/* Set the given entry so that it has a chunk of the file containing
445   the given offset.  It is this function that brings data into the
446   cache, either by reading the local file or pulling it from the
447   remote server. */
448static void set_CEnt ( const DiImage* img, UInt entNo, DiOffT off )
449{
450   SizeT len;
451   DiOffT off_orig = off;
452   vg_assert(img);
453   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
454   vg_assert(entNo >= 0 && entNo < img->ces_used);
455   vg_assert(off < img->size);
456   vg_assert(img->ces[entNo] != NULL);
457   /* Compute [off, +len) as the slice we are going to read. */
458   off = block_round_down(off);
459   len = img->size - off;
460   if (len > CACHE_ENTRY_SIZE) len = CACHE_ENTRY_SIZE;
461   /* It is conceivable that the 'len > 0' bit could fail if we make
462      an image with a zero sized file.  But then no 'get' request on
463      that image would be valid. */
464   vg_assert(len > 0 && len <= CACHE_ENTRY_SIZE);
465   vg_assert(off + len <= img->size);
466   vg_assert(off <= off_orig && off_orig < off+len);
467   /* So, read  off .. off+len-1  into the entry. */
468   CEnt* ce = img->ces[entNo];
469
470   if (0) {
471      static UInt t_last = 0;
472      static ULong nread = 0;
473      UInt now = VG_(read_millisecond_timer)();
474      UInt delay = now - t_last;
475      t_last = now;
476      nread += len;
477      VG_(printf)("XXXXXXXX (tot %'lld)  read %'ld  offset %'lld  delay %'u\n",
478                  nread, len, off, delay);
479   }
480
481   if (img->source.is_local) {
482      // Simple: just read it
483      SysRes sr = VG_(pread)(img->source.fd, &ce->data[0], (Int)len, off);
484      vg_assert(!sr_isError(sr));
485   } else {
486      // Not so simple: poke the server
487      vg_assert(img->source.session_id > 0);
488      Frame* req
489         = mk_Frame_le64_le64_le64("READ", img->source.session_id, off, len);
490      Frame* res = do_transaction(img->source.fd, req);
491      free_Frame(req); req = NULL;
492      if (!res) goto server_fail;
493      ULong  rx_session_id = 0, rx_off = 0, rx_len = 0, rx_zdata_len = 0;
494      UChar* rx_data = NULL;
495      /* Pretty confusing.  rx_sessionid, rx_off and rx_len are copies
496         of the values that we requested in the READ frame just above,
497         so we can be sure that the server is responding to the right
498         request.  It just copies them from the request into the
499         response.  rx_data is the actual data, and rx_zdata_len is
500         its compressed length.  Hence rx_len must equal len, but
501         rx_zdata_len can be different -- smaller, hopefully.. */
502      if (!parse_Frame_le64_le64_le64_bytes
503          (res, "RDOK", &rx_session_id, &rx_off,
504                        &rx_len, &rx_data, &rx_zdata_len))
505         goto server_fail;
506      if (rx_session_id != img->source.session_id
507          || rx_off != off || rx_len != len || rx_data == NULL)
508         goto server_fail;
509
510      //VG_(memcpy)(&ce->data[0], rx_data, len);
511      // Decompress into the destination buffer
512      // Tell the lib the max number of output bytes it can write.
513      // After the call, this holds the number of bytes actually written,
514      // and it's an error if it is different.
515      lzo_uint out_len = len;
516      Int lzo_rc = lzo1x_decompress_safe(rx_data, rx_zdata_len,
517                                         &ce->data[0], &out_len,
518                                         NULL);
519      Bool ok = lzo_rc == LZO_E_OK && out_len == len;
520      if (!ok) goto server_fail;
521
522      free_Frame(res); res = NULL;
523      goto end_of_else_clause;
524     server_fail:
525      /* The server screwed up somehow.  Now what? */
526      if (res) {
527         UChar* reason = NULL;
528         if (parse_Frame_asciiz(res, "FAIL", &reason)) {
529            VG_(umsg)("set_CEnt (reading data from DI server): fail: "
530                      "%s\n", reason);
531         } else {
532            VG_(umsg)("set_CEnt (reading data from DI server): fail: "
533                      "unknown reason\n");
534         }
535         free_Frame(res); res = NULL;
536      } else {
537         VG_(umsg)("set_CEnt (reading data from DI server): fail: "
538                   "server unexpectedly closed the connection\n");
539      }
540      give_up__comms_lost();
541      /* NOTREACHED */
542      vg_assert(0);
543     end_of_else_clause:
544      {}
545   }
546
547   ce->off  = off;
548   ce->used = len;
549   vg_assert(ce->used > 0 && ce->used <= CACHE_ENTRY_SIZE);
550}
551
552__attribute__((noinline))
553static UChar get_slowcase ( DiImage* img, DiOffT off )
554{
555   /* Stay sane .. */
556   vg_assert(off < img->size);
557   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
558   UInt i;
559   /* Start the search at entry 1, since the fast-case function
560      checked slot zero already. */
561   for (i = 1; i < img->ces_used; i++) {
562      vg_assert(img->ces[i]);
563      if (is_in_CEnt(img->ces[i], off))
564         break;
565   }
566   vg_assert(i <= img->ces_used);
567   if (i == img->ces_used) {
568      /* It's not in any entry.  Either allocate a new entry or
569         recycle the LRU one. */
570      if (img->ces_used == CACHE_N_ENTRIES) {
571         /* All entries in use.  Recycle the (ostensibly) LRU one. */
572         set_CEnt(img, CACHE_N_ENTRIES-1, off);
573         i = CACHE_N_ENTRIES-1;
574      } else {
575         /* Allocate a new one, and fill it in. */
576         UInt entNo = alloc_CEnt(img);
577         set_CEnt(img, entNo, off);
578         i = entNo;
579      }
580   } else {
581      /* We found it at position 'i'. */
582      vg_assert(i > 0);
583   }
584   if (i > 0) {
585      move_CEnt_to_top(img, i);
586      i = 0;
587   }
588   vg_assert(is_in_CEnt(img->ces[i], off));
589   return img->ces[i]->data[ off - img->ces[i]->off ];
590}
591
592// This is called a lot, so do the usual fast/slow split stuff on it. */
593static inline UChar get ( DiImage* img, DiOffT off )
594{
595   /* Most likely case is, it's in the ces[0] position. */
596   /* ML_(img_from_local_file) requests a read for ces[0] when
597      creating the image.  Hence slot zero is always non-NULL, so we
598      can skip this test. */
599   if (LIKELY(/* img->ces[0] != NULL && */
600              is_in_CEnt(img->ces[0], off))) {
601      return img->ces[0]->data[ off - img->ces[0]->off ];
602   }
603   /* Else we'll have to fish around for it. */
604   return get_slowcase(img, off);
605}
606
607/* Create an image from a file in the local filesystem.  This is
608   relatively straightforward. */
609DiImage* ML_(img_from_local_file)(const HChar* fullpath)
610{
611   SysRes         fd;
612   struct vg_stat stat_buf;
613   DiOffT         size;
614
615   fd = VG_(open)(fullpath, VKI_O_RDONLY, 0);
616   if (sr_isError(fd))
617      return NULL;
618
619   if (VG_(fstat)(sr_Res(fd), &stat_buf) != 0) {
620      VG_(close)(sr_Res(fd));
621      return NULL;
622   }
623
624   size = stat_buf.size;
625   if (size == 0 || size == DiOffT_INVALID
626       || /* size is unrepresentable as a SizeT */
627          size != (DiOffT)(SizeT)(size)) {
628      VG_(close)(sr_Res(fd));
629      return NULL;
630   }
631
632   DiImage* img = ML_(dinfo_zalloc)("di.image.ML_iflf.1", sizeof(DiImage));
633   img->source.is_local = True;
634   img->source.fd       = sr_Res(fd);
635   img->size            = size;
636   img->ces_used        = 0;
637   img->source.name     = ML_(dinfo_strdup)("di.image.ML_iflf.2", fullpath);
638   /* img->ces is already zeroed out */
639   vg_assert(img->source.fd >= 0);
640
641   /* Force the zeroth entry to be the first chunk of the file.
642      That's likely to be the first part that's requested anyway, and
643      loading it at this point forcing img->cent[0] to always be
644      non-empty, thereby saving us an is-it-empty check on the fast
645      path in get(). */
646   UInt entNo = alloc_CEnt(img);
647   vg_assert(entNo == 0);
648   set_CEnt(img, 0, 0);
649
650   return img;
651}
652
653
654/* Create an image from a file on a remote debuginfo server.  This is
655   more complex.  There are lots of ways in which it can fail. */
656DiImage* ML_(img_from_di_server)(const HChar* filename,
657                                 const HChar* serverAddr)
658{
659   if (filename == NULL || serverAddr == NULL)
660      return NULL;
661
662   /* The filename must be a plain filename -- no slashes at all. */
663   if (VG_(strchr)(filename, '/') != NULL)
664      return NULL;
665
666   /* Try to connect to the server.  A side effect of this is to parse
667      and reject, if syntactically invalid, |serverAddr|.  Reasons why
668      this could fail:
669      - serverAddr is not of the form d.d.d.d:d or d.d.d.d
670      - attempt to connect to that address:port failed
671   */
672   Int sd = VG_(connect_via_socket)(serverAddr);
673   if (sd < 0)
674      return NULL;
675   if (!set_blocking(sd))
676      return NULL;
677   Int one = 1;
678   Int sr = VG_(setsockopt)(sd, VKI_IPPROTO_TCP, VKI_TCP_NODELAY,
679                            &one, sizeof(one));
680   vg_assert(sr == 0);
681
682   /* Ok, we got a connection.  Ask it for version string, so as to be
683      reasonably sure we're talking to an instance of
684      auxprogs/valgrind-di-server and not to some other random program
685      that happens to be listening on that port. */
686   Frame* req = mk_Frame_noargs("VERS");
687   Frame* res = do_transaction(sd, req);
688   if (res == NULL)
689      goto fail; // do_transaction failed?!
690   UChar* vstr = NULL;
691   if (!parse_Frame_asciiz(res, "VEOK", &vstr))
692      goto fail; // unexpected response kind, or invalid ID string
693   vg_assert(vstr);
694   if (VG_(strcmp)("Valgrind Debuginfo Server, Version 1",
695                   (const HChar*)vstr) != 0)
696      goto fail; // wrong version string
697   free_Frame(req);
698   free_Frame(res);
699   req = NULL;
700   res = NULL;
701
702   /* Server seems plausible.  Present it with the name of the file we
703      want and see if it'll give us back a session ID for it. */
704   req = mk_Frame_asciiz("OPEN", filename);
705   res = do_transaction(sd, req);
706   if (res == NULL)
707      goto fail;
708   ULong session_id = 0, size = 0;
709   if (!parse_Frame_le64_le64(res, "OPOK", &session_id, &size))
710      goto fail;
711   free_Frame(req);
712   free_Frame(res);
713   req = NULL;
714   res = NULL;
715
716   /* We have a session ID.  We're ready to roll. */
717   DiImage* img = ML_(dinfo_zalloc)("di.image.ML_ifds.1", sizeof(DiImage));
718   img->source.is_local   = False;
719   img->source.fd         = sd;
720   img->source.session_id = session_id;
721   img->size              = size;
722   img->ces_used          = 0;
723   img->source.name       = ML_(dinfo_zalloc)("di.image.ML_ifds.2",
724                                              20 + VG_(strlen)(filename)
725                                                 + VG_(strlen)(serverAddr));
726   VG_(sprintf)(img->source.name, "%s at %s", filename, serverAddr);
727
728   /* img->ces is already zeroed out */
729   vg_assert(img->source.fd >= 0);
730
731   /* See comment on equivalent bit in ML_(img_from_local_file) for
732      rationale. */
733   UInt entNo = alloc_CEnt(img);
734   vg_assert(entNo == 0);
735   set_CEnt(img, 0, 0);
736
737   return img;
738
739  fail:
740   free_Frame(req);
741   if (res) {
742      UChar* reason = NULL;
743      if (parse_Frame_asciiz(res, "FAIL", &reason)) {
744         // HACK: if it's just telling us that the file can't
745         // be opened, don't print it, else we'll get flooded with
746         // such complaints, one for each main object for which there
747         // isn't a debuginfo file on the server.
748         if (0 != VG_(strcmp)((const HChar*)reason, "OPEN: cannot open file"))
749            VG_(umsg)("ML_(img_from_di_server): fail: %s\n", reason);
750      } else {
751         VG_(umsg)("ML_(img_from_di_server): fail: unknown reason\n");
752      }
753      free_Frame(res);
754   }
755   VG_(close)(sd);
756   return NULL;
757}
758
759void ML_(img_done)(DiImage* img)
760{
761   vg_assert(img);
762   if (img->source.is_local) {
763      /* Close the file; nothing else to do. */
764      vg_assert(img->source.session_id == 0);
765      VG_(close)(img->source.fd);
766   } else {
767      /* Close the socket.  The server can detect this and will scrub
768         the connection when it happens, so there's no need to tell it
769         explicitly by sending it a "CLOSE" message, or any such. */
770      vg_assert(img->source.session_id != 0);
771      VG_(close)(img->source.fd);
772   }
773
774   /* Free up the cache entries, ultimately |img| itself. */
775   UInt i;
776   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
777   for (i = 0; i < img->ces_used; i++) {
778      ML_(dinfo_free)(img->ces[i]);
779   }
780   /* Take the opportunity to sanity check the rest. */
781   for (i = i; i < img->ces_used; i++) {
782      vg_assert(img->ces[i] == NULL);
783   }
784   ML_(dinfo_free)(img->source.name);
785   ML_(dinfo_free)(img);
786}
787
788DiOffT ML_(img_size)(const DiImage* img)
789{
790   vg_assert(img);
791   return img->size;
792}
793
794inline Bool ML_(img_valid)(const DiImage* img, DiOffT offset, SizeT size)
795{
796   vg_assert(img);
797   vg_assert(offset != DiOffT_INVALID);
798   return img->size > 0 && offset + size <= (DiOffT)img->size;
799}
800
801/* Check the given range is valid, and if not, shut down the system.
802   An invalid range would imply that we're trying to read outside the
803   image, which normally means the image is corrupted somehow, or the
804   caller is buggy.  Recovering is too complex, and we have
805   probably-corrupt debuginfo, so just give up. */
806static void ensure_valid(const DiImage* img, DiOffT offset, SizeT size,
807                         const HChar* caller)
808{
809   if (LIKELY(ML_(img_valid)(img, offset, size)))
810      return;
811   VG_(umsg)("Valgrind: debuginfo reader: ensure_valid failed:\n");
812   VG_(umsg)("Valgrind:   during call to %s\n", caller);
813   VG_(umsg)("Valgrind:   request for range [%llu, +%llu) exceeds\n",
814             (ULong)offset, (ULong)size);
815   VG_(umsg)("Valgrind:   valid image size of %llu for image:\n",
816             (ULong)img->size);
817   VG_(umsg)("Valgrind:   \"%s\"\n", img->source.name);
818   give_up__image_overrun();
819}
820
821
822void ML_(img_get)(/*OUT*/void* dst,
823                  DiImage* img, DiOffT offset, SizeT size)
824{
825   vg_assert(img);
826   vg_assert(size > 0);
827   ensure_valid(img, offset, size, "ML_(img_get)");
828   SizeT i;
829   for (i = 0; i < size; i++) {
830      ((UChar*)dst)[i] = get(img, offset + i);
831   }
832}
833
834SizeT ML_(img_get_some)(/*OUT*/void* dst,
835                        DiImage* img, DiOffT offset, SizeT size)
836{
837   vg_assert(img);
838   vg_assert(size > 0);
839   ensure_valid(img, offset, size, "ML_(img_get_some)");
840   UChar* dstU = (UChar*)dst;
841   /* Use |get| in the normal way to get the first byte of the range.
842      This guarantees to put the cache entry containing |offset| in
843      position zero. */
844   dstU[0] = get(img, offset);
845   /* Now just read as many bytes as we can (or need) directly out of
846      entry zero, without bothering to call |get| each time. */
847   const CEnt* ce = img->ces[0];
848   vg_assert(ce && ce->used >= 1);
849   vg_assert(is_in_CEnt(ce, offset));
850   SizeT nToCopy = size - 1;
851   SizeT nAvail  = (SizeT)(ce->used - (offset + 1 - ce->off));
852   vg_assert(nAvail >= 0 && nAvail <= ce->used-1);
853   if (nAvail < nToCopy) nToCopy = nAvail;
854   VG_(memcpy)(&dstU[1], &ce->data[offset + 1 - ce->off], nToCopy);
855   return nToCopy + 1;
856}
857
858
859SizeT ML_(img_strlen)(DiImage* img, DiOffT off)
860{
861   ensure_valid(img, off, 1, "ML_(img_strlen)");
862   SizeT i = 0;
863   while (get(img, off + i) != 0) i++;
864   return i;
865}
866
867HChar* ML_(img_strdup)(DiImage* img, const HChar* cc, DiOffT offset)
868{
869   ensure_valid(img, offset, 1, "ML_(img_strdup)");
870   SizeT  len = ML_(img_strlen)(img, offset);
871   HChar* res = ML_(dinfo_zalloc)(cc, len+1);
872   SizeT  i;
873   for (i = 0; i < len; i++) {
874      res[i] = get(img, offset+i);
875   }
876   vg_assert(res[len] == 0);
877   return res;
878}
879
880Int ML_(img_strcmp)(DiImage* img, DiOffT off1, DiOffT off2)
881{
882   ensure_valid(img, off1, 1, "ML_(img_strcmp)(first arg)");
883   ensure_valid(img, off2, 1, "ML_(img_strcmp)(second arg)");
884   while (True) {
885      UChar c1 = get(img, off1);
886      UChar c2 = get(img, off2);
887      if (c1 < c2) return -1;
888      if (c1 > c2) return 1;
889      if (c1 == 0) return 0;
890      off1++; off2++;
891   }
892}
893
894Int ML_(img_strcmp_c)(DiImage* img, DiOffT off1, const HChar* str2)
895{
896   ensure_valid(img, off1, 1, "ML_(img_strcmp_c)");
897   while (True) {
898      UChar c1 = get(img, off1);
899      UChar c2 = *(const UChar*)str2;
900      if (c1 < c2) return -1;
901      if (c1 > c2) return 1;
902      if (c1 == 0) return 0;
903      off1++; str2++;
904   }
905}
906
907UChar ML_(img_get_UChar)(DiImage* img, DiOffT offset)
908{
909   ensure_valid(img, offset, 1, "ML_(img_get_UChar)");
910   return get(img, offset);
911}
912
913UShort ML_(img_get_UShort)(DiImage* img, DiOffT offset)
914{
915   UShort r;
916   ML_(img_get)(&r, img, offset, sizeof(r));
917   return r;
918}
919
920UInt ML_(img_get_UInt)(DiImage* img, DiOffT offset)
921{
922   UInt r;
923   ML_(img_get)(&r, img, offset, sizeof(r));
924   return r;
925}
926
927ULong ML_(img_get_ULong)(DiImage* img, DiOffT offset)
928{
929   ULong r;
930   ML_(img_get)(&r, img, offset, sizeof(r));
931   return r;
932}
933
934
935/*
936 * This routine for calculating the CRC for a separate debug file
937 * is GPLed code borrowed from GNU binutils.
938 */
939UInt ML_(img_calc_gnu_debuglink_crc32)(DiImage* img)
940{
941  static const UInt crc32_table[256] =
942    {
943      0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
944      0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
945      0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
946      0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
947      0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
948      0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
949      0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
950      0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
951      0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
952      0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
953      0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
954      0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
955      0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
956      0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
957      0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
958      0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
959      0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
960      0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
961      0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
962      0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
963      0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
964      0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
965      0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
966      0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
967      0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
968      0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
969      0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
970      0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
971      0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
972      0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
973      0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
974      0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
975      0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
976      0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
977      0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
978      0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
979      0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
980      0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
981      0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
982      0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
983      0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
984      0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
985      0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
986      0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
987      0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
988      0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
989      0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
990      0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
991      0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
992      0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
993      0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
994      0x2d02ef8d
995    };
996
997   vg_assert(img);
998
999   /* If the image is local, calculate the CRC here directly.  If it's
1000      remote, forward the request to the server. */
1001   if (img->source.is_local) {
1002      /* Work through the image in 1 KB chunks. */
1003      UInt   crc      = 0xFFFFFFFF;
1004      DiOffT img_szB  = ML_(img_size)(img);
1005      DiOffT curr_off = 0;
1006      while (1) {
1007         vg_assert(curr_off >= 0 && curr_off <= img_szB);
1008         if (curr_off == img_szB) break;
1009         DiOffT avail = img_szB - curr_off;
1010         vg_assert(avail > 0 && avail <= img_szB);
1011         if (avail > 1024) avail = 1024;
1012         UChar buf[1024];
1013         SizeT nGot = ML_(img_get_some)(buf, img, curr_off, avail);
1014         vg_assert(nGot >= 1 && nGot <= avail);
1015         UInt i;
1016         for (i = 0; i < (UInt)nGot; i++)
1017            crc = crc32_table[(crc ^ buf[i]) & 0xff] ^ (crc >> 8);
1018         curr_off += nGot;
1019      }
1020      return ~crc & 0xFFFFFFFF;
1021   } else {
1022      Frame* req = mk_Frame_noargs("CRC3");
1023      Frame* res = do_transaction(img->source.fd, req);
1024      if (!res) goto remote_crc_fail;
1025      ULong crc32 = 0;
1026      if (!parse_Frame_le64(res, "CROK", &crc32)) goto remote_crc_fail;
1027      if ((crc32 & ~0xFFFFFFFFULL) != 0) goto remote_crc_fail;
1028      free_Frame(req);
1029      free_Frame(res);
1030      return (UInt)crc32;
1031     remote_crc_fail:
1032
1033      // XXXX common this up with the READ diagnostic cases
1034      if (res) {
1035         UChar* reason = NULL;
1036         if (parse_Frame_asciiz(res, "FAIL", &reason)) {
1037            VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1038                      "%s\n", reason);
1039         } else {
1040            VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1041                      "unknown reason\n");
1042         }
1043      } else {
1044         VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1045                   "server unexpectedly closed the connection\n");
1046      }
1047
1048      if (req) free_Frame(req);
1049      if (res) free_Frame(res);
1050      // FIXME: now what?
1051      give_up__comms_lost();
1052      /* NOTREACHED */
1053      vg_assert(0);
1054   }
1055   /*NOTREACHED*/
1056   vg_assert(0);
1057}
1058
1059////////////////////////////////////////////////////
1060#include "minilzo-inl.c"
1061
1062/*--------------------------------------------------------------------*/
1063/*--- end                                                  image.c ---*/
1064/*--------------------------------------------------------------------*/
1065