1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- An abstraction that provides a file-reading mechanism.       ---*/
5/*---                                                      image.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Valgrind, a dynamic binary instrumentation
10   framework.
11
12   Copyright (C) 2013-2013 Mozilla Foundation
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32/* Contributed by Julian Seward <jseward@acm.org> */
33
34/* See the corresponding auxprogs/valgrind-di-server.c for a list of
35   cleanups for this file and itself. */
36
37#include "pub_core_basics.h"
38#include "pub_core_vki.h"
39#include "pub_core_libcbase.h"
40#include "pub_core_libcassert.h"
41#include "pub_core_libcprint.h"
42#include "pub_core_libcproc.h"     /* VG_(read_millisecond_timer) */
43#include "pub_core_libcfile.h"
44#include "priv_misc.h"             /* dinfo_zalloc/free/strdup */
45#include "priv_image.h"            /* self */
46
47#include "minilzo.h"
48
49#define CACHE_ENTRY_SIZE_BITS (12+1)
50#define CACHE_N_ENTRIES       32
51
52#define CACHE_ENTRY_SIZE      (1 << CACHE_ENTRY_SIZE_BITS)
53
54/* An entry in the cache. */
55typedef
56   struct {
57      DiOffT off; // file offset for data[0]
58      SizeT  used; // 1 .. sizeof(data), or 0 to denote not-in-use
59      UChar  data[CACHE_ENTRY_SIZE];
60   }
61   CEnt;
62
63/* Source for files */
64typedef
65   struct {
66      // True: img is of local file.  False: img is from a server.
67      Bool  is_local;
68      // The fd for the local file, or sd for a remote server.
69      Int   fd;
70      // The name.  In ML_(dinfo_zalloc)'d space.  Used only for printing
71      // error messages; hence it doesn't really matter what this contains.
72      HChar* name;
73      // The rest of these fields are only valid when using remote files
74      // (that is, using a debuginfo server; hence when is_local==False)
75      // Session ID allocated to us by the server.  Cannot be zero.
76      ULong session_id;
77   }
78   Source;
79
80struct _DiImage {
81   // The source -- how to get hold of the file we are reading
82   Source source;
83   // Total size of the image.
84   SizeT size;
85   // The number of entries used.  0 .. CACHE_N_ENTRIES
86   UInt  ces_used;
87   // Pointers to the entries.  ces[0 .. ces_used-1] are non-NULL.
88   // ces[ces_used .. CACHE_N_ENTRIES-1] are NULL.
89   // The non-NULL entries may be arranged arbitrarily.  We expect to use
90   // a pseudo-LRU scheme though.
91   CEnt* ces[CACHE_N_ENTRIES];
92};
93
94/* A frame.  The first 4 bytes of |data| give the kind of the frame,
95   and the rest of it is kind-specific data. */
96typedef  struct { UChar* data; SizeT n_data; }  Frame;
97
98static void write_UInt_le ( /*OUT*/UChar* dst, UInt n )
99{
100   Int i;
101   for (i = 0; i <= 3; i++) {
102      dst[i] = (UChar)(n & 0xFF);
103      n >>= 8;
104   }
105}
106
107static UInt read_UInt_le ( UChar* src )
108{
109   UInt r = 0;
110   Int i;
111   for (i = 3; i >= 0; i--) {
112      r <<= 8;
113      r += (UInt)src[i];
114   }
115   return r;
116}
117
118static void write_ULong_le ( /*OUT*/UChar* dst, ULong n )
119{
120   Int i;
121   for (i = 0; i <= 7; i++) {
122      dst[i] = (UChar)(n & 0xFF);
123      n >>= 8;
124   }
125}
126
127static ULong read_ULong_le ( UChar* src )
128{
129   ULong r = 0;
130   Int i;
131   for (i = 7; i >= 0; i--) {
132      r <<= 8;
133      r += (ULong)src[i];
134   }
135   return r;
136}
137
138
139/* Set |sd| to be blocking.  Returns True on success. */
140static Bool set_blocking ( int sd )
141{
142   Int res;
143   res = VG_(fcntl)(sd, VKI_F_GETFL, 0/*ignored*/);
144   if (res != -1)
145      res = VG_(fcntl)(sd, VKI_F_SETFL, res & ~VKI_O_NONBLOCK);
146   return (res != -1);
147}
148
149/* Tries to read 'len' bytes from fd, blocking if necessary.  Assumes
150   fd has been set in blocking mode.  If it returns with the number of
151   bytes read < len, it means that either fd was closed, or there was
152   an error on it. */
153static Int my_read ( Int fd, UChar* buf, Int len )
154{
155   Int nRead = 0;
156   while (1) {
157      if (nRead == len) return nRead;
158      vg_assert(nRead < len);
159      Int nNeeded = len - nRead;
160      vg_assert(nNeeded > 0);
161      Int n = VG_(read)(fd, &buf[nRead], nNeeded);
162      if (n <= 0) return nRead; /* error or EOF */
163      nRead += n;
164   }
165}
166
167/* Tries to write 'len' bytes to fd, blocking if necessary.  Assumes
168   fd has been set in blocking mode.  If it returns with the number of
169   bytes written < len, it means that either fd was closed, or there was
170   an error on it. */
171static Int my_write ( Int fd, UChar* buf, Int len )
172{
173   Int nWritten = 0;
174   while (1) {
175      if (nWritten == len) return nWritten;
176      vg_assert(nWritten < len);
177      Int nStillToDo = len - nWritten;
178      vg_assert(nStillToDo > 0);
179      Int n = VG_(write_socket)(fd, &buf[nWritten], nStillToDo);
180      if (n < 0) return nWritten; /* error or EOF */
181      nWritten += n;
182   }
183}
184
185/* If we lost communication with the remote server, just give up.
186   Recovering is too difficult. */
187static void give_up__comms_lost(void)
188{
189   VG_(umsg)("\n");
190   VG_(umsg)(
191      "Valgrind: debuginfo reader: Lost communication with the remote\n");
192   VG_(umsg)(
193      "Valgrind: debuginfo server.  I can't recover.  Giving up.  Sorry.\n");
194   VG_(umsg)("\n");
195   VG_(exit)(1);
196   /*NOTREACHED*/
197}
198
199static void give_up__image_overrun(void)
200{
201   VG_(umsg)("\n");
202   VG_(umsg)(
203      "Valgrind: debuginfo reader: Possibly corrupted debuginfo file.\n");
204   VG_(umsg)(
205      "Valgrind: I can't recover.  Giving up.  Sorry.\n");
206   VG_(umsg)("\n");
207   VG_(exit)(1);
208   /*NOTREACHED*/
209}
210
211/* "Do" a transaction: that is, send the given frame to the server and
212   return the frame it sends back.  Caller owns the resulting frame
213   and must free it.  A NULL return means the transaction failed for
214   some reason. */
215static Frame* do_transaction ( Int sd, Frame* req )
216{
217   if (0) VG_(printf)("CLIENT: send %c%c%c%c\n",
218                      req->data[0], req->data[1], req->data[2], req->data[3]);
219
220   /* What goes on the wire is:
221         adler(le32) n_data(le32) data[0 .. n_data-1]
222      where the checksum covers n_data as well as data[].
223   */
224   /* The initial Adler-32 value */
225   UInt adler = VG_(adler32)(0, NULL, 0);
226
227   /* Fold in the length field, encoded as le32. */
228   UChar wr_first8[8];
229   write_UInt_le(&wr_first8[4], req->n_data);
230   adler = VG_(adler32)(adler, &wr_first8[4], 4);
231   /* Fold in the data values */
232   adler = VG_(adler32)(adler, req->data, req->n_data);
233   write_UInt_le(&wr_first8[0], adler);
234
235   Int r = my_write(sd, &wr_first8[0], 8);
236   if (r != 8) return NULL;
237   vg_assert(req->n_data >= 4); // else ill formed -- no KIND field
238   r = my_write(sd, req->data, req->n_data);
239   if (r != req->n_data) return NULL;
240
241   /* So, the request is sent.  Now get a request of the same format
242      out of the channel. */
243   UChar rd_first8[8];  // adler32; length32
244   r = my_read(sd, &rd_first8[0], 8);
245   if (r != 8) return NULL;
246   UInt rd_adler = read_UInt_le(&rd_first8[0]);
247   UInt rd_len   = read_UInt_le(&rd_first8[4]);
248   /* Allocate a Frame to hold the result data, and read into it. */
249   // Reject obviously-insane length fields.
250   if (rd_len < 4 || rd_len > 4*1024*1024) return NULL;
251   Frame* res = ML_(dinfo_zalloc)("di.do_transaction.1", sizeof(Frame));
252   res->n_data = rd_len;
253   res->data = ML_(dinfo_zalloc)("di.do_transaction.2", rd_len);
254   r = my_read(sd, res->data, res->n_data);
255   if (r != rd_len) return NULL;
256
257   if (0) VG_(printf)("CLIENT: recv %c%c%c%c\n",
258                      res->data[0], res->data[1], res->data[2], res->data[3]);
259
260   /* Compute the checksum for the received data, and check it. */
261   adler = VG_(adler32)(0, NULL, 0); // initial value
262   adler = VG_(adler32)(adler, &rd_first8[4], 4);
263   if (res->n_data > 0)
264      adler = VG_(adler32)(adler, res->data, res->n_data);
265
266   if (adler/*computed*/ != rd_adler/*expected*/) return NULL;
267   return res;
268}
269
270static void free_Frame ( Frame* fr )
271{
272   vg_assert(fr && fr->data);
273   ML_(dinfo_free)(fr->data);
274   ML_(dinfo_free)(fr);
275}
276
277static Frame* mk_Frame_noargs ( const HChar* tag )
278{
279   vg_assert(VG_(strlen)(tag) == 4);
280   Frame* f = ML_(dinfo_zalloc)("di.mFn.1", sizeof(Frame));
281   f->n_data = 4;
282   f->data = ML_(dinfo_zalloc)("di.mFn.2", f->n_data);
283   VG_(memcpy)(&f->data[0], tag, 4);
284   return f;
285}
286
287static Frame* mk_Frame_le64_le64_le64 ( const HChar* tag,
288                                        ULong n1, ULong n2, ULong n3 )
289{
290   vg_assert(VG_(strlen)(tag) == 4);
291   Frame* f = ML_(dinfo_zalloc)("di.mFlll.1", sizeof(Frame));
292   f->n_data = 4 + 3*8;
293   f->data = ML_(dinfo_zalloc)("di.mFlll.2", f->n_data);
294   VG_(memcpy)(&f->data[0], tag, 4);
295   write_ULong_le(&f->data[4 + 0*8], n1);
296   write_ULong_le(&f->data[4 + 1*8], n2);
297   write_ULong_le(&f->data[4 + 2*8], n3);
298   return f;
299}
300
301static Frame* mk_Frame_asciiz ( const HChar* tag, const HChar* str )
302{
303   vg_assert(VG_(strlen)(tag) == 4);
304   Frame* f = ML_(dinfo_zalloc)("di.mFa.1", sizeof(Frame));
305   SizeT n_str = VG_(strlen)(str);
306   f->n_data = 4 + n_str + 1;
307   f->data = ML_(dinfo_zalloc)("di.mFa.2", f->n_data);
308   VG_(memcpy)(&f->data[0], tag, 4);
309   VG_(memcpy)(&f->data[4], str, n_str);
310   vg_assert(f->data[4 + n_str] == 0);
311   return f;
312}
313
314static Bool parse_Frame_le64 ( Frame* fr, const HChar* tag, /*OUT*/ULong* n1 )
315{
316   vg_assert(VG_(strlen)(tag) == 4);
317   if (!fr || !fr->data) return False;
318   if (fr->n_data < 4) return False;
319   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
320   if (fr->n_data != 4 + 1*8) return False;
321   *n1 = read_ULong_le(&fr->data[4 + 0*8]);
322   return True;
323}
324
325static Bool parse_Frame_le64_le64 ( Frame* fr, const HChar* tag,
326                                    /*OUT*/ULong* n1, /*OUT*/ULong* n2 )
327{
328   vg_assert(VG_(strlen)(tag) == 4);
329   if (!fr || !fr->data) return False;
330   if (fr->n_data < 4) return False;
331   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
332   if (fr->n_data != 4 + 2*8) return False;
333   *n1 = read_ULong_le(&fr->data[4 + 0*8]);
334   *n2 = read_ULong_le(&fr->data[4 + 1*8]);
335   return True;
336}
337
338static Bool parse_Frame_asciiz ( Frame* fr, const HChar* tag,
339                                 /*OUT*/UChar** str )
340{
341   vg_assert(VG_(strlen)(tag) == 4);
342   if (!fr || !fr->data) return False;
343   if (fr->n_data < 4) return False;
344   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
345   if (fr->n_data < 5) return False; // else there isn't even enough
346                                     // space for the terminating zero
347   /* Find the terminating zero and ensure it's right at the end
348      of the data.  If not, the frame is malformed. */
349   SizeT i = 4;
350   while (True) {
351      if (i >= fr->n_data) break;
352      if (fr->data[i] == 0) break;
353      i++;
354   }
355   vg_assert(i <= fr->n_data);
356   if (i == fr->n_data-1 && fr->data[i] == 0) {
357      *str = &fr->data[4];
358      return True;
359   } else {
360      return False;
361   }
362}
363
364static Bool parse_Frame_le64_le64_le64_bytes (
365               Frame* fr, const HChar* tag,
366               /*OUT*/ULong* n1, /*OUT*/ULong* n2, /*OUT*/ULong* n3,
367               /*OUT*/UChar** data, /*OUT*/ULong* n_data
368            )
369{
370   vg_assert(VG_(strlen)(tag) == 4);
371   if (!fr || !fr->data) return False;
372   if (fr->n_data < 4) return False;
373   if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
374   if (fr->n_data < 4 + 3*8) return False;
375   *n1 = read_ULong_le(&fr->data[4 + 0*8]);
376   *n2 = read_ULong_le(&fr->data[4 + 1*8]);
377   *n3 = read_ULong_le(&fr->data[4 + 2*8]);
378   *data   = &fr->data[4 + 3*8];
379   *n_data = fr->n_data - (4 + 3*8);
380   vg_assert(fr->n_data >= 4 + 3*8);
381   return True;
382}
383
384static DiOffT block_round_down ( DiOffT i )
385{
386   return i & ((DiOffT)~(CACHE_ENTRY_SIZE-1));
387}
388
389/* Is this offset inside this CEnt? */
390static inline Bool is_in_CEnt ( CEnt* cent, DiOffT off )
391{
392   /* This assertion is checked by set_CEnt, so checking it here has
393      no benefit, whereas skipping it does remove it from the hottest
394      path. */
395   /* vg_assert(cent->used > 0 && cent->used <= CACHE_ENTRY_SIZE); */
396   return cent->off <= off && off < cent->off + cent->used;
397}
398
399/* Allocate a new CEnt, connect it to |img|, and return its index. */
400static UInt alloc_CEnt ( DiImage* img )
401{
402   vg_assert(img);
403   vg_assert(img->ces_used < CACHE_N_ENTRIES);
404   UInt entNo = img->ces_used;
405   img->ces_used++;
406   vg_assert(img->ces[entNo] == NULL);
407   img->ces[entNo] = ML_(dinfo_zalloc)("di.alloc_CEnt.1", sizeof(CEnt));
408   return entNo;
409}
410
411/* Move the given entry to the top and slide those above it down by 1,
412   to make space. */
413static void move_CEnt_to_top ( DiImage* img, UInt entNo )
414{
415   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
416   vg_assert(entNo > 0 && entNo < img->ces_used);
417   CEnt* tmp = img->ces[entNo];
418   while (entNo > 0) {
419      img->ces[entNo] = img->ces[entNo-1];
420      entNo--;
421   }
422   img->ces[0] = tmp;
423}
424
425/* Set the given entry so that it has a chunk of the file containing
426   the given offset.  It is this function that brings data into the
427   cache, either by reading the local file or pulling it from the
428   remote server. */
429static void set_CEnt ( DiImage* img, UInt entNo, DiOffT off )
430{
431   SizeT len;
432   DiOffT off_orig = off;
433   vg_assert(img);
434   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
435   vg_assert(entNo >= 0 && entNo < img->ces_used);
436   vg_assert(off < img->size);
437   vg_assert(img->ces[entNo] != NULL);
438   /* Compute [off, +len) as the slice we are going to read. */
439   off = block_round_down(off);
440   len = img->size - off;
441   if (len > CACHE_ENTRY_SIZE) len = CACHE_ENTRY_SIZE;
442   /* It is conceivable that the 'len > 0' bit could fail if we make
443      an image with a zero sized file.  But then no 'get' request on
444      that image would be valid. */
445   vg_assert(len > 0 && len <= CACHE_ENTRY_SIZE);
446   vg_assert(off + len <= img->size);
447   vg_assert(off <= off_orig && off_orig < off+len);
448   /* So, read  off .. off+len-1  into the entry. */
449   CEnt* ce = img->ces[entNo];
450
451   if (0) {
452      static UInt t_last = 0;
453      static ULong nread = 0;
454      UInt now = VG_(read_millisecond_timer)();
455      UInt delay = now - t_last;
456      t_last = now;
457      nread += len;
458      VG_(printf)("XXXXXXXX (tot %lld) read %ld offset %lld  %u\n",
459                  nread, len, off, delay);
460   }
461
462   if (img->source.is_local) {
463      // Simple: just read it
464      SysRes sr = VG_(pread)(img->source.fd, &ce->data[0], (Int)len, off);
465      vg_assert(!sr_isError(sr));
466   } else {
467      // Not so simple: poke the server
468      vg_assert(img->source.session_id > 0);
469      Frame* req
470         = mk_Frame_le64_le64_le64("READ", img->source.session_id, off, len);
471      Frame* res = do_transaction(img->source.fd, req);
472      free_Frame(req); req = NULL;
473      if (!res) goto server_fail;
474      ULong  rx_session_id = 0, rx_off = 0, rx_len = 0, rx_zdata_len = 0;
475      UChar* rx_data = NULL;
476      /* Pretty confusing.  rx_sessionid, rx_off and rx_len are copies
477         of the values that we requested in the READ frame just above,
478         so we can be sure that the server is responding to the right
479         request.  It just copies them from the request into the
480         response.  rx_data is the actual data, and rx_zdata_len is
481         its compressed length.  Hence rx_len must equal len, but
482         rx_zdata_len can be different -- smaller, hopefully.. */
483      if (!parse_Frame_le64_le64_le64_bytes
484          (res, "RDOK", &rx_session_id, &rx_off,
485                        &rx_len, &rx_data, &rx_zdata_len))
486         goto server_fail;
487      if (rx_session_id != img->source.session_id
488          || rx_off != off || rx_len != len || rx_data == NULL)
489         goto server_fail;
490
491      //VG_(memcpy)(&ce->data[0], rx_data, len);
492      // Decompress into the destination buffer
493      // Tell the lib the max number of output bytes it can write.
494      // After the call, this holds the number of bytes actually written,
495      // and it's an error if it is different.
496      UInt out_len = len;
497      Int lzo_rc = lzo1x_decompress_safe(rx_data, rx_zdata_len,
498                                         &ce->data[0], (lzo_uint*)&out_len,
499                                         NULL);
500      Bool ok = lzo_rc == LZO_E_OK && out_len == len;
501      if (!ok) goto server_fail;
502
503      free_Frame(res); res = NULL;
504      goto end_of_else_clause;
505     server_fail:
506      /* The server screwed up somehow.  Now what? */
507      if (res) {
508         UChar* reason = NULL;
509         if (parse_Frame_asciiz(res, "FAIL", &reason)) {
510            VG_(umsg)("set_CEnt (reading data from DI server): fail: "
511                      "%s\n", reason);
512         } else {
513            VG_(umsg)("set_CEnt (reading data from DI server): fail: "
514                      "unknown reason\n");
515         }
516         free_Frame(res); res = NULL;
517      } else {
518         VG_(umsg)("set_CEnt (reading data from DI server): fail: "
519                   "server unexpectedly closed the connection\n");
520      }
521      give_up__comms_lost();
522      /* NOTREACHED */
523      vg_assert(0);
524     end_of_else_clause:
525      {}
526   }
527
528   ce->off  = off;
529   ce->used = len;
530   vg_assert(ce->used > 0 && ce->used <= CACHE_ENTRY_SIZE);
531}
532
533__attribute__((noinline))
534static UChar get_slowcase ( DiImage* img, DiOffT off )
535{
536   /* Stay sane .. */
537   vg_assert(off < img->size);
538   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
539   UInt i;
540   /* Start the search at entry 1, since the fast-case function
541      checked slot zero already. */
542   for (i = 1; i < img->ces_used; i++) {
543      vg_assert(img->ces[i]);
544      if (is_in_CEnt(img->ces[i], off))
545         break;
546   }
547   vg_assert(i <= img->ces_used);
548   if (i == img->ces_used) {
549      /* It's not in any entry.  Either allocate a new entry or
550         recycle the LRU one. */
551      if (img->ces_used == CACHE_N_ENTRIES) {
552         /* All entries in use.  Recycle the (ostensibly) LRU one. */
553         set_CEnt(img, CACHE_N_ENTRIES-1, off);
554         i = CACHE_N_ENTRIES-1;
555      } else {
556         /* Allocate a new one, and fill it in. */
557         UInt entNo = alloc_CEnt(img);
558         set_CEnt(img, entNo, off);
559         i = entNo;
560      }
561   } else {
562      /* We found it at position 'i'. */
563      vg_assert(i > 0);
564   }
565   if (i > 0) {
566      move_CEnt_to_top(img, i);
567      i = 0;
568   }
569   vg_assert(is_in_CEnt(img->ces[i], off));
570   return img->ces[i]->data[ off - img->ces[i]->off ];
571}
572
573// This is called a lot, so do the usual fast/slow split stuff on it. */
574static UChar get ( DiImage* img, DiOffT off )
575{
576   /* Most likely case is, it's in the ces[0] position. */
577   /* ML_(img_from_local_file) requests a read for ces[0] when
578      creating the image.  Hence slot zero is always non-NULL, so we
579      can skip this test. */
580   if (LIKELY(/* img->ces[0] != NULL && */
581              is_in_CEnt(img->ces[0], off))) {
582      return img->ces[0]->data[ off - img->ces[0]->off ];
583   }
584   /* Else we'll have to fish around for it. */
585   return get_slowcase(img, off);
586}
587
588/* Create an image from a file in the local filesystem.  This is
589   relatively straightforward. */
590DiImage* ML_(img_from_local_file)(const HChar* fullpath)
591{
592   SysRes         fd;
593   struct vg_stat stat_buf;
594   DiOffT         size;
595
596   fd = VG_(open)(fullpath, VKI_O_RDONLY, 0);
597   if (sr_isError(fd))
598      return NULL;
599
600   if (VG_(fstat)(sr_Res(fd), &stat_buf) != 0) {
601      VG_(close)(sr_Res(fd));
602      return NULL;
603   }
604
605   size = stat_buf.size;
606   if (size == 0 || size == DiOffT_INVALID
607       || /* size is unrepresentable as a SizeT */
608          size != (DiOffT)(SizeT)(size)) {
609      VG_(close)(sr_Res(fd));
610      return NULL;
611   }
612
613   DiImage* img = ML_(dinfo_zalloc)("di.image.ML_iflf.1", sizeof(DiImage));
614   img->source.is_local = True;
615   img->source.fd       = sr_Res(fd);
616   img->size            = size;
617   img->ces_used        = 0;
618   img->source.name     = ML_(dinfo_strdup)("di.image.ML_iflf.2", fullpath);
619   /* img->ces is already zeroed out */
620   vg_assert(img->source.fd >= 0);
621
622   /* Force the zeroth entry to be the first chunk of the file.
623      That's likely to be the first part that's requested anyway, and
624      loading it at this point forcing img->cent[0] to always be
625      non-empty, thereby saving us an is-it-empty check on the fast
626      path in get(). */
627   UInt entNo = alloc_CEnt(img);
628   vg_assert(entNo == 0);
629   set_CEnt(img, 0, 0);
630
631   return img;
632}
633
634
635/* Create an image from a file on a remote debuginfo server.  This is
636   more complex.  There are lots of ways in which it can fail. */
637DiImage* ML_(img_from_di_server)(const HChar* filename,
638                                 const HChar* serverAddr)
639{
640   if (filename == NULL || serverAddr == NULL)
641      return NULL;
642
643   /* The filename must be a plain filename -- no slashes at all. */
644   if (VG_(strchr)(filename, '/') != NULL)
645      return NULL;
646
647   /* Try to connect to the server.  A side effect of this is to parse
648      and reject, if syntactically invalid, |serverAddr|.  Reasons why
649      this could fail:
650      - serverAddr is not of the form d.d.d.d:d or d.d.d.d
651      - attempt to connect to that address:port failed
652   */
653   Int sd = VG_(connect_via_socket)(serverAddr);
654   if (sd < 0)
655      return NULL;
656   if (!set_blocking(sd))
657      return NULL;
658   Int one = 1;
659   Int sr = VG_(setsockopt)(sd, VKI_IPPROTO_TCP, VKI_TCP_NODELAY,
660                            &one, sizeof(one));
661   vg_assert(sr == 0);
662
663   /* Ok, we got a connection.  Ask it for version string, so as to be
664      reasonably sure we're talking to an instance of
665      auxprogs/valgrind-di-server and not to some other random program
666      that happens to be listening on that port. */
667   Frame* req = mk_Frame_noargs("VERS");
668   Frame* res = do_transaction(sd, req);
669   if (res == NULL)
670      goto fail; // do_transaction failed?!
671   UChar* vstr = NULL;
672   if (!parse_Frame_asciiz(res, "VEOK", &vstr))
673      goto fail; // unexpected response kind, or invalid ID string
674   vg_assert(vstr);
675   if (VG_(strcmp)("Valgrind Debuginfo Server, Version 1",
676                   (const HChar*)vstr) != 0)
677      goto fail; // wrong version string
678   free_Frame(req);
679   free_Frame(res);
680   req = NULL;
681   res = NULL;
682
683   /* Server seems plausible.  Present it with the name of the file we
684      want and see if it'll give us back a session ID for it. */
685   req = mk_Frame_asciiz("OPEN", filename);
686   res = do_transaction(sd, req);
687   if (res == NULL)
688      goto fail;
689   ULong session_id = 0, size = 0;
690   if (!parse_Frame_le64_le64(res, "OPOK", &session_id, &size))
691      goto fail;
692   free_Frame(req);
693   free_Frame(res);
694   req = NULL;
695   res = NULL;
696
697   /* We have a session ID.  We're ready to roll. */
698   DiImage* img = ML_(dinfo_zalloc)("di.image.ML_ifds.1", sizeof(DiImage));
699   img->source.is_local   = False;
700   img->source.fd         = sd;
701   img->source.session_id = session_id;
702   img->size              = size;
703   img->ces_used          = 0;
704   img->source.name       = ML_(dinfo_zalloc)("di.image.ML_ifds.2",
705                                              20 + VG_(strlen)(filename)
706                                                 + VG_(strlen)(serverAddr));
707   VG_(sprintf)(img->source.name, "%s at %s", filename, serverAddr);
708
709   /* img->ces is already zeroed out */
710   vg_assert(img->source.fd >= 0);
711
712   /* See comment on equivalent bit in ML_(img_from_local_file) for
713      rationale. */
714   UInt entNo = alloc_CEnt(img);
715   vg_assert(entNo == 0);
716   set_CEnt(img, 0, 0);
717
718   return img;
719
720  fail:
721   if (req) free_Frame(req);
722   if (res) {
723      UChar* reason = NULL;
724      if (parse_Frame_asciiz(res, "FAIL", &reason)) {
725         // HACK: if it's just telling us that the file can't
726         // be opened, don't print it, else we'll get flooded with
727         // such complaints, one for each main object for which there
728         // isn't a debuginfo file on the server.
729         if (0 != VG_(strcmp)((const HChar*)reason, "OPEN: cannot open file"))
730            VG_(umsg)("ML_(img_from_di_server): fail: %s\n", reason);
731      } else {
732         VG_(umsg)("ML_(img_from_di_server): fail: unknown reason\n");
733      }
734      free_Frame(res);
735   }
736   VG_(close)(sd);
737   return NULL;
738}
739
740void ML_(img_done)(DiImage* img)
741{
742   vg_assert(img);
743   if (img->source.is_local) {
744      /* Close the file; nothing else to do. */
745      vg_assert(img->source.session_id == 0);
746      VG_(close)(img->source.fd);
747   } else {
748      /* Close the socket.  The server can detect this and will scrub
749         the connection when it happens, so there's no need to tell it
750         explicitly by sending it a "CLOSE" message, or any such. */
751      vg_assert(img->source.session_id != 0);
752      VG_(close)(img->source.fd);
753   }
754
755   /* Free up the cache entries, ultimately |img| itself. */
756   UInt i;
757   vg_assert(img->ces_used <= CACHE_N_ENTRIES);
758   for (i = 0; i < img->ces_used; i++) {
759      ML_(dinfo_free)(img->ces[i]);
760   }
761   /* Take the opportunity to sanity check the rest. */
762   for (i = i; i < img->ces_used; i++) {
763      vg_assert(img->ces[i] == NULL);
764   }
765   ML_(dinfo_free)(img->source.name);
766   ML_(dinfo_free)(img);
767}
768
769DiOffT ML_(img_size)(DiImage* img)
770{
771   vg_assert(img);
772   return img->size;
773}
774
775inline Bool ML_(img_valid)(DiImage* img, DiOffT offset, SizeT size)
776{
777   vg_assert(img);
778   vg_assert(offset != DiOffT_INVALID);
779   return img->size > 0 && offset + size <= (DiOffT)img->size;
780}
781
782/* Check the given range is valid, and if not, shut down the system.
783   An invalid range would imply that we're trying to read outside the
784   image, which normally means the image is corrupted somehow, or the
785   caller is buggy.  Recovering is too complex, and we have
786   probably-corrupt debuginfo, so just give up. */
787static void ensure_valid(DiImage* img, DiOffT offset, SizeT size,
788                         const HChar* caller)
789{
790   if (LIKELY(ML_(img_valid)(img, offset, size)))
791      return;
792   VG_(umsg)("Valgrind: debuginfo reader: ensure_valid failed:\n");
793   VG_(umsg)("Valgrind:   during call to %s\n", caller);
794   VG_(umsg)("Valgrind:   request for range [%llu, +%llu) exceeds\n",
795             (ULong)offset, (ULong)size);
796   VG_(umsg)("Valgrind:   valid image size of %llu for image:\n",
797             (ULong)img->size);
798   VG_(umsg)("Valgrind:   \"%s\"\n", img->source.name);
799   give_up__image_overrun();
800}
801
802
803void ML_(img_get)(/*OUT*/void* dst,
804                  DiImage* img, DiOffT offset, SizeT size)
805{
806   vg_assert(img);
807   vg_assert(size > 0);
808   ensure_valid(img, offset, size, "ML_(img_get)");
809   SizeT i;
810   for (i = 0; i < size; i++) {
811      ((UChar*)dst)[i] = get(img, offset + i);
812   }
813}
814
815SizeT ML_(img_get_some)(/*OUT*/void* dst,
816                        DiImage* img, DiOffT offset, SizeT size)
817{
818   vg_assert(img);
819   vg_assert(size > 0);
820   ensure_valid(img, offset, size, "ML_(img_get_some)");
821   UChar* dstU = (UChar*)dst;
822   /* Use |get| in the normal way to get the first byte of the range.
823      This guarantees to put the cache entry containing |offset| in
824      position zero. */
825   dstU[0] = get(img, offset);
826   /* Now just read as many bytes as we can (or need) directly out of
827      entry zero, without bothering to call |get| each time. */
828   CEnt* ce = img->ces[0];
829   vg_assert(ce && ce->used >= 1);
830   vg_assert(is_in_CEnt(ce, offset));
831   SizeT nToCopy = size - 1;
832   SizeT nAvail  = (SizeT)(ce->used - (offset + 1 - ce->off));
833   vg_assert(nAvail >= 0 && nAvail <= ce->used-1);
834   if (nAvail < nToCopy) nToCopy = nAvail;
835   VG_(memcpy)(&dstU[1], &ce->data[offset + 1 - ce->off], nToCopy);
836   return nToCopy + 1;
837}
838
839
840SizeT ML_(img_strlen)(DiImage* img, DiOffT off)
841{
842   ensure_valid(img, off, 1, "ML_(img_strlen)");
843   SizeT i = 0;
844   while (get(img, off + i) != 0) i++;
845   return i;
846}
847
848HChar* ML_(img_strdup)(DiImage* img, const HChar* cc, DiOffT offset)
849{
850   ensure_valid(img, offset, 1, "ML_(img_strdup)");
851   SizeT  len = ML_(img_strlen)(img, offset);
852   HChar* res = ML_(dinfo_zalloc)(cc, len+1);
853   SizeT  i;
854   for (i = 0; i < len; i++) {
855      res[i] = get(img, offset+i);
856   }
857   vg_assert(res[len] == 0);
858   return res;
859}
860
861Int ML_(img_strcmp)(DiImage* img, DiOffT off1, DiOffT off2)
862{
863   ensure_valid(img, off1, 1, "ML_(img_strcmp)(first arg)");
864   ensure_valid(img, off2, 1, "ML_(img_strcmp)(second arg)");
865   while (True) {
866      UChar c1 = get(img, off1);
867      UChar c2 = get(img, off2);
868      if (c1 < c2) return -1;
869      if (c1 > c2) return 1;
870      if (c1 == 0) return 0;
871      off1++; off2++;
872   }
873}
874
875Int ML_(img_strcmp_c)(DiImage* img, DiOffT off1, const HChar* str2)
876{
877   ensure_valid(img, off1, 1, "ML_(img_strcmp_c)");
878   while (True) {
879      UChar c1 = get(img, off1);
880      UChar c2 = *(UChar*)str2;
881      if (c1 < c2) return -1;
882      if (c1 > c2) return 1;
883      if (c1 == 0) return 0;
884      off1++; str2++;
885   }
886}
887
888UChar ML_(img_get_UChar)(DiImage* img, DiOffT offset)
889{
890   ensure_valid(img, offset, 1, "ML_(img_get_UChar)");
891   return get(img, offset);
892}
893
894UShort ML_(img_get_UShort)(DiImage* img, DiOffT offset)
895{
896   UShort r;
897   ML_(img_get)(&r, img, offset, sizeof(r));
898   return r;
899}
900
901UInt ML_(img_get_UInt)(DiImage* img, DiOffT offset)
902{
903   UInt r;
904   ML_(img_get)(&r, img, offset, sizeof(r));
905   return r;
906}
907
908ULong ML_(img_get_ULong)(DiImage* img, DiOffT offset)
909{
910   ULong r;
911   ML_(img_get)(&r, img, offset, sizeof(r));
912   return r;
913}
914
915
916/*
917 * This routine for calculating the CRC for a separate debug file
918 * is GPLed code borrowed from GNU binutils.
919 */
920UInt ML_(img_calc_gnu_debuglink_crc32)(DiImage* img)
921{
922  static const UInt crc32_table[256] =
923    {
924      0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
925      0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
926      0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
927      0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
928      0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
929      0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
930      0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
931      0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
932      0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
933      0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
934      0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
935      0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
936      0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
937      0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
938      0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
939      0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
940      0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
941      0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
942      0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
943      0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
944      0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
945      0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
946      0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
947      0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
948      0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
949      0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
950      0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
951      0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
952      0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
953      0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
954      0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
955      0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
956      0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
957      0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
958      0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
959      0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
960      0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
961      0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
962      0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
963      0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
964      0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
965      0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
966      0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
967      0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
968      0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
969      0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
970      0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
971      0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
972      0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
973      0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
974      0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
975      0x2d02ef8d
976    };
977
978   vg_assert(img);
979
980   /* If the image is local, calculate the CRC here directly.  If it's
981      remote, forward the request to the server. */
982   if (img->source.is_local) {
983      /* Work through the image in 1 KB chunks. */
984      UInt   crc      = 0xFFFFFFFF;
985      DiOffT img_szB  = ML_(img_size)(img);
986      DiOffT curr_off = 0;
987      while (1) {
988         vg_assert(curr_off >= 0 && curr_off <= img_szB);
989         if (curr_off == img_szB) break;
990         DiOffT avail = img_szB - curr_off;
991         vg_assert(avail > 0 && avail <= img_szB);
992         if (avail > 1024) avail = 1024;
993         UChar buf[1024];
994         SizeT nGot = ML_(img_get_some)(buf, img, curr_off, avail);
995         vg_assert(nGot >= 1 && nGot <= avail);
996         UInt i;
997         for (i = 0; i < (UInt)nGot; i++)
998            crc = crc32_table[(crc ^ buf[i]) & 0xff] ^ (crc >> 8);
999         curr_off += nGot;
1000      }
1001      return ~crc & 0xFFFFFFFF;
1002   } else {
1003      Frame* req = mk_Frame_noargs("CRC3");
1004      Frame* res = do_transaction(img->source.fd, req);
1005      if (!res) goto remote_crc_fail;
1006      ULong crc32 = 0;
1007      if (!parse_Frame_le64(res, "CROK", &crc32)) goto remote_crc_fail;
1008      if ((crc32 & ~0xFFFFFFFFULL) != 0) goto remote_crc_fail;
1009      if (req) free_Frame(req);
1010      if (res) free_Frame(res);
1011      return (UInt)crc32;
1012     remote_crc_fail:
1013
1014      // XXXX common this up with the READ diagnostic cases
1015      if (res) {
1016         UChar* reason = NULL;
1017         if (parse_Frame_asciiz(res, "FAIL", &reason)) {
1018            VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1019                      "%s\n", reason);
1020         } else {
1021            VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1022                      "unknown reason\n");
1023         }
1024      } else {
1025         VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1026                   "server unexpectedly closed the connection\n");
1027      }
1028
1029      if (req) free_Frame(req);
1030      if (res) free_Frame(res);
1031      // FIXME: now what?
1032      give_up__comms_lost();
1033      /* NOTREACHED */
1034      vg_assert(0);
1035   }
1036   /*NOTREACHED*/
1037   vg_assert(0);
1038}
1039
1040////////////////////////////////////////////////////
1041#include "minilzo-inl.c"
1042
1043/*--------------------------------------------------------------------*/
1044/*--- end                                                  image.c ---*/
1045/*--------------------------------------------------------------------*/
1046