1/* 2 * gzlog.c 3 * Copyright (C) 2004 Mark Adler 4 * For conditions of distribution and use, see copyright notice in gzlog.h 5 * version 1.0, 26 Nov 2004 6 * 7 */ 8 9#include <string.h> /* memcmp() */ 10#include <stdlib.h> /* malloc(), free(), NULL */ 11#include <sys/types.h> /* size_t, off_t */ 12#include <unistd.h> /* read(), close(), sleep(), ftruncate(), */ 13 /* lseek() */ 14#include <fcntl.h> /* open() */ 15#include <sys/file.h> /* flock() */ 16#include "zlib.h" /* deflateInit2(), deflate(), deflateEnd() */ 17 18#include "gzlog.h" /* interface */ 19#define local static 20 21/* log object structure */ 22typedef struct { 23 int id; /* object identifier */ 24 int fd; /* log file descriptor */ 25 off_t extra; /* offset of extra "ap" subfield */ 26 off_t mark_off; /* offset of marked data */ 27 off_t last_off; /* offset of last block */ 28 unsigned long crc; /* uncompressed crc */ 29 unsigned long len; /* uncompressed length (modulo 2^32) */ 30 unsigned stored; /* length of current stored block */ 31} gz_log; 32 33#define GZLOGID 19334 /* gz_log object identifier */ 34 35#define LOCK_RETRY 1 /* retry lock once a second */ 36#define LOCK_PATIENCE 1200 /* try about twenty minutes before forcing */ 37 38/* acquire a lock on a file */ 39local int lock(int fd) 40{ 41 int patience; 42 43 /* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */ 44 patience = LOCK_PATIENCE; 45 do { 46 if (flock(fd, LOCK_EX + LOCK_NB) == 0) 47 return 0; 48 (void)sleep(LOCK_RETRY); 49 patience -= LOCK_RETRY; 50 } while (patience > 0); 51 52 /* we've run out of patience -- give up */ 53 return -1; 54} 55 56/* release lock */ 57local void unlock(int fd) 58{ 59 (void)flock(fd, LOCK_UN); 60} 61 62/* release a log object */ 63local void log_clean(gz_log *log) 64{ 65 unlock(log->fd); 66 (void)close(log->fd); 67 free(log); 68} 69 70/* read an unsigned long from a byte buffer little-endian */ 71local unsigned long make_ulg(unsigned char *buf) 72{ 73 int n; 74 unsigned long val; 75 76 val = (unsigned long)(*buf++); 77 for (n = 8; n < 32; n += 8) 78 val += (unsigned long)(*buf++) << n; 79 return val; 80} 81 82/* read an off_t from a byte buffer little-endian */ 83local off_t make_off(unsigned char *buf) 84{ 85 int n; 86 off_t val; 87 88 val = (off_t)(*buf++); 89 for (n = 8; n < 64; n += 8) 90 val += (off_t)(*buf++) << n; 91 return val; 92} 93 94/* write an unsigned long little-endian to byte buffer */ 95local void dice_ulg(unsigned long val, unsigned char *buf) 96{ 97 int n; 98 99 for (n = 0; n < 4; n++) { 100 *buf++ = val & 0xff; 101 val >>= 8; 102 } 103} 104 105/* write an off_t little-endian to byte buffer */ 106local void dice_off(off_t val, unsigned char *buf) 107{ 108 int n; 109 110 for (n = 0; n < 8; n++) { 111 *buf++ = val & 0xff; 112 val >>= 8; 113 } 114} 115 116/* initial, empty gzip file for appending */ 117local char empty_gz[] = { 118 0x1f, 0x8b, /* magic gzip id */ 119 8, /* compression method is deflate */ 120 4, /* there is an extra field */ 121 0, 0, 0, 0, /* no modification time provided */ 122 0, 0xff, /* no extra flags, no OS */ 123 20, 0, 'a', 'p', 16, 0, /* extra field with "ap" subfield */ 124 32, 0, 0, 0, 0, 0, 0, 0, /* offset of uncompressed data */ 125 32, 0, 0, 0, 0, 0, 0, 0, /* offset of last block */ 126 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ 127 0, 0, 0, 0, /* crc */ 128 0, 0, 0, 0 /* uncompressed length */ 129}; 130 131/* initialize a log object with locking */ 132void *gzlog_open(char *path) 133{ 134 unsigned xlen; 135 unsigned char temp[20]; 136 unsigned sub_len; 137 int good; 138 gz_log *log; 139 140 /* allocate log structure */ 141 log = malloc(sizeof(gz_log)); 142 if (log == NULL) 143 return NULL; 144 log->id = GZLOGID; 145 146 /* open file, creating it if necessary, and locking it */ 147 log->fd = open(path, O_RDWR | O_CREAT, 0600); 148 if (log->fd < 0) { 149 free(log); 150 return NULL; 151 } 152 if (lock(log->fd)) { 153 close(log->fd); 154 free(log); 155 return NULL; 156 } 157 158 /* if file is empty, write new gzip stream */ 159 if (lseek(log->fd, 0, SEEK_END) == 0) { 160 if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) { 161 log_clean(log); 162 return NULL; 163 } 164 } 165 166 /* check gzip header */ 167 (void)lseek(log->fd, 0, SEEK_SET); 168 if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f || 169 temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) { 170 log_clean(log); 171 return NULL; 172 } 173 174 /* process extra field to find "ap" sub-field */ 175 xlen = temp[10] + (temp[11] << 8); 176 good = 0; 177 while (xlen) { 178 if (xlen < 4 || read(log->fd, temp, 4) != 4) 179 break; 180 sub_len = temp[2]; 181 sub_len += temp[3] << 8; 182 xlen -= 4; 183 if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) { 184 good = 1; 185 break; 186 } 187 if (xlen < sub_len) 188 break; 189 (void)lseek(log->fd, sub_len, SEEK_CUR); 190 xlen -= sub_len; 191 } 192 if (!good) { 193 log_clean(log); 194 return NULL; 195 } 196 197 /* read in "ap" sub-field */ 198 log->extra = lseek(log->fd, 0, SEEK_CUR); 199 if (read(log->fd, temp, 16) != 16) { 200 log_clean(log); 201 return NULL; 202 } 203 log->mark_off = make_off(temp); 204 log->last_off = make_off(temp + 8); 205 206 /* get crc, length of gzip file */ 207 (void)lseek(log->fd, log->last_off, SEEK_SET); 208 if (read(log->fd, temp, 13) != 13 || 209 memcmp(temp, "\001\000\000\377\377", 5) != 0) { 210 log_clean(log); 211 return NULL; 212 } 213 log->crc = make_ulg(temp + 5); 214 log->len = make_ulg(temp + 9); 215 216 /* set up to write over empty last block */ 217 (void)lseek(log->fd, log->last_off + 5, SEEK_SET); 218 log->stored = 0; 219 return (void *)log; 220} 221 222/* maximum amount to put in a stored block before starting a new one */ 223#define MAX_BLOCK 16384 224 225/* write a block to a log object */ 226int gzlog_write(void *obj, char *data, size_t len) 227{ 228 size_t some; 229 unsigned char temp[5]; 230 gz_log *log; 231 232 /* check object */ 233 log = (gz_log *)obj; 234 if (log == NULL || log->id != GZLOGID) 235 return 1; 236 237 /* write stored blocks until all of the input is written */ 238 do { 239 some = MAX_BLOCK - log->stored; 240 if (some > len) 241 some = len; 242 if (write(log->fd, data, some) != some) 243 return 1; 244 log->crc = crc32(log->crc, data, some); 245 log->len += some; 246 len -= some; 247 data += some; 248 log->stored += some; 249 250 /* if the stored block is full, end it and start another */ 251 if (log->stored == MAX_BLOCK) { 252 (void)lseek(log->fd, log->last_off, SEEK_SET); 253 temp[0] = 0; 254 dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), 255 temp + 1); 256 if (write(log->fd, temp, 5) != 5) 257 return 1; 258 log->last_off = lseek(log->fd, log->stored, SEEK_CUR); 259 (void)lseek(log->fd, 5, SEEK_CUR); 260 log->stored = 0; 261 } 262 } while (len); 263 return 0; 264} 265 266/* recompress the remaining stored deflate data in place */ 267local int recomp(gz_log *log) 268{ 269 z_stream strm; 270 size_t len, max; 271 unsigned char *in; 272 unsigned char *out; 273 unsigned char temp[16]; 274 275 /* allocate space and read it all in (it's around 1 MB) */ 276 len = log->last_off - log->mark_off; 277 max = len + (len >> 12) + (len >> 14) + 11; 278 out = malloc(max); 279 if (out == NULL) 280 return 1; 281 in = malloc(len); 282 if (in == NULL) { 283 free(out); 284 return 1; 285 } 286 (void)lseek(log->fd, log->mark_off, SEEK_SET); 287 if (read(log->fd, in, len) != len) { 288 free(in); 289 free(out); 290 return 1; 291 } 292 293 /* recompress in memory, decoding stored data as we go */ 294 /* note: this assumes that unsigned is four bytes or more */ 295 /* consider not making that assumption */ 296 strm.zalloc = Z_NULL; 297 strm.zfree = Z_NULL; 298 strm.opaque = Z_NULL; 299 if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8, 300 Z_DEFAULT_STRATEGY) != Z_OK) { 301 free(in); 302 free(out); 303 return 1; 304 } 305 strm.next_in = in; 306 strm.avail_out = max; 307 strm.next_out = out; 308 while (len >= 5) { 309 if (strm.next_in[0] != 0) 310 break; 311 strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8); 312 strm.next_in += 5; 313 len -= 5; 314 if (strm.avail_in != 0) { 315 if (len < strm.avail_in) 316 break; 317 len -= strm.avail_in; 318 (void)deflate(&strm, Z_NO_FLUSH); 319 if (strm.avail_in != 0 || strm.avail_out == 0) 320 break; 321 } 322 } 323 (void)deflate(&strm, Z_SYNC_FLUSH); 324 (void)deflateEnd(&strm); 325 free(in); 326 if (len != 0 || strm.avail_out == 0) { 327 free(out); 328 return 1; 329 } 330 331 /* overwrite stored data with compressed data */ 332 (void)lseek(log->fd, log->mark_off, SEEK_SET); 333 len = max - strm.avail_out; 334 if (write(log->fd, out, len) != len) { 335 free(out); 336 return 1; 337 } 338 free(out); 339 340 /* write last empty block, crc, and length */ 341 log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR); 342 temp[0] = 1; 343 dice_ulg(0xffffL << 16, temp + 1); 344 dice_ulg(log->crc, temp + 5); 345 dice_ulg(log->len, temp + 9); 346 if (write(log->fd, temp, 13) != 13) 347 return 1; 348 349 /* truncate file to discard remaining stored data and old trailer */ 350 ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR)); 351 352 /* update extra field to point to new last empty block */ 353 (void)lseek(log->fd, log->extra, SEEK_SET); 354 dice_off(log->mark_off, temp); 355 dice_off(log->last_off, temp + 8); 356 if (write(log->fd, temp, 16) != 16) 357 return 1; 358 return 0; 359} 360 361/* maximum accumulation of stored blocks before compressing */ 362#define MAX_STORED 1048576 363 364/* close log object */ 365int gzlog_close(void *obj) 366{ 367 unsigned char temp[8]; 368 gz_log *log; 369 370 /* check object */ 371 log = (gz_log *)obj; 372 if (log == NULL || log->id != GZLOGID) 373 return 1; 374 375 /* go to start of most recent block being written */ 376 (void)lseek(log->fd, log->last_off, SEEK_SET); 377 378 /* if some stuff was put there, update block */ 379 if (log->stored) { 380 temp[0] = 0; 381 dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), 382 temp + 1); 383 if (write(log->fd, temp, 5) != 5) 384 return 1; 385 log->last_off = lseek(log->fd, log->stored, SEEK_CUR); 386 } 387 388 /* write last block (empty) */ 389 if (write(log->fd, "\001\000\000\377\377", 5) != 5) 390 return 1; 391 392 /* write updated crc and uncompressed length */ 393 dice_ulg(log->crc, temp); 394 dice_ulg(log->len, temp + 4); 395 if (write(log->fd, temp, 8) != 8) 396 return 1; 397 398 /* put offset of that last block in gzip extra block */ 399 (void)lseek(log->fd, log->extra + 8, SEEK_SET); 400 dice_off(log->last_off, temp); 401 if (write(log->fd, temp, 8) != 8) 402 return 1; 403 404 /* if more than 1 MB stored, then time to compress it */ 405 if (log->last_off - log->mark_off > MAX_STORED) { 406 if (recomp(log)) 407 return 1; 408 } 409 410 /* unlock and close file */ 411 log_clean(log); 412 return 0; 413} 414