Ticket #22706: 002-gzipped_cache.patch

File 002-gzipped_cache.patch, 32.7 KB (added by nickm, 2 years ago)
  • src/common/gzutil.c

    diff -uNr tor-0.2.3.16-alpha-anon_mmap//src/common/gzutil.c tor-0.2.3.16-alpha-gzipped_cache//src/common/gzutil.c
    old new  
     1/* Copyright (c) 2003, Roger Dingledine
     2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
     3 * Copyright (c) 2007-2011, The Tor Project, Inc. */
     4/* See LICENSE for licensing information */
     5
     6/**
     7 * \file gzutil.c
     8 * \brief Common functions for gzipped files.
     9 **/
     10
     11#include "orconfig.h"
     12#include "util.h"
     13#include "gzutil.h"
     14#include "torlog.h"
     15#undef log
     16#include "torint.h"
     17#include "container.h"
     18
     19#ifdef MS_WINDOWS
     20#include <io.h>
     21#endif
     22
     23#include <stdlib.h>
     24#include <stdio.h>
     25
     26#ifdef HAVE_ERRNO_H
     27#include <errno.h>
     28#endif
     29#ifdef HAVE_UNISTD_H
     30#include <unistd.h>
     31#endif
     32#ifdef HAVE_SYS_STAT_H
     33#include <sys/stat.h>
     34#endif
     35#ifdef HAVE_SYS_FCNTL_H
     36#include <sys/fcntl.h>
     37#endif
     38#ifdef HAVE_FCNTL_H
     39#include <fcntl.h>
     40#endif
     41#ifdef HAVE_SYS_MMAN_H
     42#include <sys/mman.h>
     43#endif
     44
     45/** Represents a file that we're writing to, with support for atomic commit:
     46 *  we can write into a temporary file, and either remove the file on
     47 *  failure, or replace the original file on success. */
     48struct gzopen_file_t {
     49  char *filename; /**< Name of the original file. */
     50  unsigned binary:1; /**< Did we open in binary mode? */
     51  int fd; /**< fd for the open file. */
     52  gzFile gzf; /**< gzip wrapper for <b>fb</b>. */
     53};
     54
     55/** Write <b>count</b> bytes from <b>buf</b> to <b>fd</b>.
     56 * Return the number of bytes written, or -1
     57 * on error.  Only use if fd is a blocking fd.  */
     58ssize_t
     59gzwrite_all(gzFile gzf, const char *buf, size_t count)
     60{
     61  size_t written = 0;
     62  ssize_t result;
     63  tor_assert(count < SSIZE_T_MAX);
     64
     65  while (written != count) {
     66    result = gzwrite(gzf, buf+written, count-written);
     67    if (result<0)
     68      return -1;
     69    written += result;
     70  }
     71  return (ssize_t)count;
     72}
     73
     74/** Read from <b>fd</b> to <b>buf</b>, until we get <b>count</b> bytes
     75 * or reach the end of the file.
     76 * Return the number of bytes read, or -1 on error. Only use
     77 * if fd is a blocking fd. */
     78ssize_t
     79gzread_all(gzFile gzf, char *buf, size_t count)
     80{
     81  size_t numread = 0;
     82  ssize_t result;
     83
     84  if (count > SIZE_T_CEILING || count > SSIZE_T_MAX)
     85    return -1;
     86
     87  while (numread != count) {
     88    result = gzread(gzf, buf+numread, count-numread);
     89    if (result<0)
     90      return -1;
     91    else if (result == 0)
     92      break;
     93    numread += result;
     94  }
     95  return (ssize_t)numread;
     96}
     97
     98/** Try to start writing to the file in <b>fname</b>, passing the flags
     99 * <b>open_flags</b> to the open() syscall, creating the file (if needed) with
     100 * access value <b>mode</b>.  We open a new temporary file in the same
     101 * directory, and either replace the original or remove the temporary file
     102 * when we're done.
     103 *
     104 * Return the gzip descriptor for the newly opened file, and store working data in
     105 * *<b>data_out</b>.  The caller should not close the descriptor manually:
     106 * instead, call finish_writing_to_file() or abort_writing_to_file().
     107 * Returns NULL on failure.
     108 */
     109gzFile
     110gzstart_writing_to_file(const char *fname, int open_flags, int mode,
     111                      gzopen_file_t **data_out)
     112{
     113  gzopen_file_t *new_file = tor_malloc_zero(sizeof(gzopen_file_t));
     114  const char *open_name;
     115
     116  tor_assert(fname);
     117  tor_assert(data_out);
     118#if (O_BINARY != 0 && O_TEXT != 0)
     119  tor_assert((open_flags & (O_BINARY|O_TEXT)) != 0);
     120#endif
     121  new_file->fd = -1;
     122  open_name = new_file->filename = tor_strdup(fname);
     123  open_flags |= O_CREAT;
     124  open_flags &= ~O_EXCL;
     125  if (open_flags & O_BINARY)
     126    new_file->binary = 1;
     127
     128  new_file->fd = open(open_name, open_flags, mode);
     129  if (new_file->fd < 0) {
     130    log_warn(LD_FS, "Couldn't open \"%s\" (%s) for writing: %s",
     131        open_name, fname, strerror(errno));
     132    goto err;
     133  }
     134
     135  /* Open the gzip descriptor */
     136  new_file->gzf = gzdopen (new_file->fd, (open_flags & O_APPEND)?"a":"w");
     137  if (!new_file->gzf) {
     138    log_warn(LD_FS,"Error opening gzipped file \"%s\": %s", open_name,
     139             strerror(errno));
     140    goto err;
     141  }
     142
     143  *data_out = new_file;
     144
     145  return new_file->gzf;
     146
     147 err:
     148  if (new_file->fd >= 0)
     149    close(new_file->fd);
     150  *data_out = NULL;
     151  tor_free(new_file->filename);
     152  tor_free(new_file);
     153  return NULL;
     154}
     155
     156/** Helper function: close and free the underlying file and memory in
     157 * <b>file_data</b>.  If we were writing into a temporary file, then delete
     158 * that file (if abort_write is true) or replaces the target file with
     159 * the temporary file (if abort_write is false). */
     160static int
     161gzfinish_writing_to_file_impl(gzopen_file_t *file_data, int abort_write)
     162{
     163  int r = 0;
     164  tor_assert(file_data && file_data->filename);
     165  if (gzclose(file_data->gzf)!=Z_OK) {
     166    log_warn(LD_FS, "Error closing \"%s\": %s", file_data->filename,
     167               strerror(errno));
     168    abort_write = r = -1;
     169    close(file_data->fd);
     170  }
     171
     172  tor_free(file_data->filename);
     173  tor_free(file_data);
     174
     175  return r;
     176}
     177
     178/** Finish writing to <b>file_data</b>: close the file handle, free memory as
     179 * needed, and if using a temporary file, replace the original file with
     180 * the temporary file. */
     181int
     182gzfinish_writing_to_file(gzopen_file_t *file_data)
     183{
     184  return gzfinish_writing_to_file_impl(file_data, 0);
     185}
     186
     187/** Finish writing to <b>file_data</b>: close the file handle, free memory as
     188 * needed, and if using a temporary file, delete it. */
     189int
     190gzabort_writing_to_file(gzopen_file_t *file_data)
     191{
     192  return gzfinish_writing_to_file_impl(file_data, 1);
     193}
     194
     195/** Helper: given a set of flags as passed to open(2), open the file
     196 * <b>fname</b> and write all the sized_chunk_t structs in <b>chunks</b> to
     197 * the file.  Do so as atomically as possible e.g. by opening temp files and
     198 * renaming. */
     199static int
     200gzwrite_chunks_to_file_impl(const char *fname, const smartlist_t *chunks,
     201                          int open_flags)
     202{
     203  gzopen_file_t *file = NULL;
     204  ssize_t result;
     205  gzFile gzf = gzstart_writing_to_file(fname, open_flags, 0600, &file);
     206  if (gzf==NULL)
     207    return -1;
     208  SMARTLIST_FOREACH(chunks, sized_chunk_t *, chunk,
     209  {
     210    result = gzwrite_all(gzf, chunk->bytes, chunk->len);
     211    if (result < 0) {
     212      log_warn(LD_FS, "Error writing to \"%s\": %s", fname,
     213          strerror(errno));
     214      goto err;
     215    }
     216    tor_assert((size_t)result == chunk->len);
     217  });
     218
     219  return gzfinish_writing_to_file(file);
     220 err:
     221  gzabort_writing_to_file(file);
     222  return -1;
     223}
     224
     225/** Given a smartlist of sized_chunk_t, write them atomically to a file
     226 * <b>fname</b>, overwriting or creating the file as necessary. */
     227int
     228gzwrite_chunks_to_file(const char *fname, const smartlist_t *chunks, int bin)
     229{
     230  int flags = OPEN_FLAGS_REPLACE|(bin?O_BINARY:O_TEXT);
     231  return gzwrite_chunks_to_file_impl(fname, chunks, flags);
     232}
     233
     234/** As write_bytes_to_file, but if the file already exists, append the bytes
     235 *  * to the end of the file instead of overwriting it. */
     236int
     237gzappend_bytes_to_file(const char *fname, const char *str, size_t len,
     238                     int bin)
     239{
     240  int r;
     241  sized_chunk_t c = { str, len };
     242  smartlist_t *chunks = smartlist_new();
     243  smartlist_add(chunks, &c);
     244  r = gzwrite_chunks_to_file_impl(fname, chunks,
     245                                  OPEN_FLAGS_APPEND|(bin?O_BINARY:O_TEXT));
     246  smartlist_free(chunks);
     247  return r;
     248}
     249
     250/** Uncompress contents of <b>filename</b> into a an
     251 *  anonymous map; return 0 on success. New data will be appended
     252 *  to existing data in the map. The map size will be increased if
     253 *  it's too small */
     254int
     255gzload_file_into_mmap(tor_mmap_t *handle, const char *filename)
     256{
     257  gzFile gzf; /* router file */
     258  ssize_t r;
     259
     260  tor_assert(filename);
     261
     262  if (!(gzf = gzopen(filename, "rb"))) {
     263    int save_errno = errno;
     264    log_info(LD_FS,"Could not open \"%s\": %s",filename,
     265           strerror(errno));
     266    errno = save_errno;
     267    return -1;
     268  }
     269
     270  /* While there's enough data to fill up the map, fill it and increase the map size */
     271  do {
     272    /* When the current map is full, increase its size */
     273    if (handle->size==handle->mapping_size && tor_mremap_inc(handle)) {
     274      int save_errno = errno;
     275      log_err(LD_FS,"Could not increase the anonymous memory mmaping while reading from file \"%s\"", filename);
     276      gzclose(gzf);
     277      errno = save_errno;
     278      return -1;
     279    }
     280
     281    /* Fill the map with data */
     282    r = gzread(gzf,(void*)handle->data+handle->size,handle->mapping_size-handle->size);
     283    if (r<0) {
     284      int save_errno = errno;
     285      log_warn(LD_FS,"Error reading from file \"%s\": %s", filename,
     286             strerror(errno));
     287      gzclose(gzf);
     288      errno = save_errno;
     289      return -1;
     290    }
     291    handle->size+=r;
     292
     293    /* Repeat if we haven't reached the end of the file */
     294  } while (r);
     295
     296  gzclose(gzf);
     297  return 0;
     298}
  • src/common/gzutil.h

    diff -uNr tor-0.2.3.16-alpha-anon_mmap//src/common/gzutil.h tor-0.2.3.16-alpha-gzipped_cache//src/common/gzutil.h
    old new  
     1/* Copyright (c) 2003-2004, Roger Dingledine
     2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
     3 * Copyright (c) 2007-2011, The Tor Project, Inc. */
     4/* See LICENSE for licensing information */
     5
     6/**
     7 * \file gzutil.h
     8 * \brief Headers for gzutil.c
     9 **/
     10
     11#ifndef _TOR_GZUTIL_H
     12#define _TOR_GZUTIL_H
     13
     14#include "orconfig.h"
     15#include "torint.h"
     16#include "compat.h"
     17#include "di_ops.h"
     18#include "util.h"
     19#include <stdio.h>
     20#include <stdlib.h>
     21
     22#include <zlib.h>
     23
     24#ifndef O_BINARY
     25#define O_BINARY 0
     26#endif
     27#ifndef O_TEXT
     28#define O_TEXT 0
     29#endif
     30
     31typedef struct gzopen_file_t gzopen_file_t;
     32
     33int gzwrite_chunks_to_file(const char *fname, const struct smartlist_t *chunks,
     34                     int bin);
     35
     36int gzload_file_into_mmap(tor_mmap_t *handle, const char *filename);
     37
     38ssize_t gzwrite_all(gzFile gzf, const char *buf, size_t count);
     39
     40gzFile gzstart_writing_to_file(const char *fname, int open_flags, int mode,
     41                      gzopen_file_t **data_out);
     42int gzfinish_writing_to_file(gzopen_file_t *file_data);
     43int gzabort_writing_to_file(gzopen_file_t *file_data);
     44int gzappend_bytes_to_file(const char *fname, const char *str, size_t len,
     45                     int bin);
     46
     47#endif
  • src/common/Makefile.am

    diff -uNr tor-0.2.3.16-alpha-anon_mmap//src/common/Makefile.am tor-0.2.3.16-alpha-gzipped_cache//src/common/Makefile.am
    old new  
    2121  mempool.c                                     \
    2222  procmon.c                                     \
    2323  util.c                                        \
     24  gzutil.c                                      \
    2425  util_codedigest.c                             \
    2526  $(libor_extra_source)
    2627
     
    5152  torint.h                                      \
    5253  torlog.h                                      \
    5354  tortls.h                                      \
    54   util.h
     55  util.h                                        \
     56  gzutil.h
    5557
    5658common_sha1.i: $(libor_SOURCES) $(libor_crypto_a_SOURCES) $(noinst_HEADERS)
    5759        if test "@SHA1SUM@" != none; then \
  • src/common/Makefile.in

    diff -uNr tor-0.2.3.16-alpha-anon_mmap//src/common/Makefile.in tor-0.2.3.16-alpha-gzipped_cache//src/common/Makefile.in
    old new  
    6161libor_a_AR = $(AR) $(ARFLAGS)
    6262libor_a_LIBADD =
    6363am__libor_a_SOURCES_DIST = address.c compat.c container.c di_ops.c \
    64         log.c memarea.c mempool.c procmon.c util.c util_codedigest.c \
     64        log.c memarea.c mempool.c procmon.c util.c gzutil.c util_codedigest.c \
    6565        OpenBSD_malloc_Linux.c
    6666@USE_OPENBSD_MALLOC_TRUE@am__objects_1 =  \
    6767@USE_OPENBSD_MALLOC_TRUE@       OpenBSD_malloc_Linux.$(OBJEXT)
    6868am_libor_a_OBJECTS = address.$(OBJEXT) compat.$(OBJEXT) \
    6969        container.$(OBJEXT) di_ops.$(OBJEXT) log.$(OBJEXT) \
    7070        memarea.$(OBJEXT) mempool.$(OBJEXT) procmon.$(OBJEXT) \
    71         util.$(OBJEXT) util_codedigest.$(OBJEXT) $(am__objects_1)
     71        util.$(OBJEXT) gzutil.$(OBJEXT) util_codedigest.$(OBJEXT) $(am__objects_1)
    7272libor_a_OBJECTS = $(am_libor_a_OBJECTS)
    7373DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
    7474depcomp = $(SHELL) $(top_srcdir)/depcomp
     
    227227  mempool.c                                     \
    228228  procmon.c                                     \
    229229  util.c                                        \
     230  gzutil.c                                      \
    230231  util_codedigest.c                             \
    231232  $(libor_extra_source)
    232233
     
    256257  torint.h                                      \
    257258  torlog.h                                      \
    258259  tortls.h                                      \
    259   util.h
     260  util.h                                        \
     261  gzutil.h
    260262
    261263all: all-am
    262264
     
    329331@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/torgzip.Po@am__quote@
    330332@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tortls.Po@am__quote@
    331333@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Po@am__quote@
     334@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gzutil.Po@am__quote@
    332335@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util_codedigest.Po@am__quote@
    333336
    334337.c.o:
  • src/or/microdesc.c

    diff -uNr tor-0.2.3.16-alpha-anon_mmap//src/or/microdesc.c tor-0.2.3.16-alpha-gzipped_cache//src/or/microdesc.c
    old new  
    1313#include "router.h"
    1414#include "routerlist.h"
    1515#include "routerparse.h"
     16#include "gzutil.h"
    1617
    17 /** A data structure to hold a bunch of cached microdescriptors.  There are
    18  * two active files in the cache: a "cache file" that we mmap, and a "journal
    19  * file" that we append to.  Periodically, we rebuild the cache file to hold
     18#include <zlib.h>
     19
     20/** A data structure to hold a bunch of cached microdescriptors.  There is
     21 * one active file in the cache: it's a main "cache file", and we also append
     22 * the journal to it. Periodically, we rebuild the cache file to hold
    2023 * only the microdescriptors that we want to keep */
    2124struct microdesc_cache_t {
    2225  /** Map from sha256-digest to microdesc_t for every microdesc_t in the
     
    2528
    2629  /** Name of the cache file. */
    2730  char *cache_fname;
    28   /** Name of the journal file. */
    29   char *journal_fname;
     31  /** Name of the old file. */
     32  char *cache_faltname;
    3033  /** Mmap'd contents of the cache file, or NULL if there is none. */
    3134  tor_mmap_t *cache_content;
    3235  /** Number of bytes used in the journal file. */
     
    7578                             : md->body;
    7679}
    7780
    78 /** Write the body of <b>md</b> into <b>f</b>, with appropriate annotations.
     81/** Write the body of <b>md</b> into <b>gzf</b>, with appropriate annotations.
    7982 * On success, return the total number of bytes written, and set
    8083 * *<b>annotation_len_out</b> to the number of bytes written as
    8184 * annotations. */
    8285static ssize_t
    83 dump_microdescriptor(FILE *f, microdesc_t *md, size_t *annotation_len_out)
     86gzdump_microdescriptor(gzFile gzf, microdesc_t *md, size_t *annotation_len_out)
    8487{
    8588  ssize_t r = 0;
    8689  size_t written;
     
    9093    char annotation[ISO_TIME_LEN+32];
    9194    format_iso_time(buf, md->last_listed);
    9295    tor_snprintf(annotation, sizeof(annotation), "@last-listed %s\n", buf);
    93     if (fputs(annotation, f) < 0) {
     96    r += strlen(annotation);
     97    if (gzwrite_all(gzf, annotation, r) < r) {
    9498      log_warn(LD_DIR,
    9599               "Couldn't write microdescriptor annotation: %s",
    96                strerror(ferror(f)));
     100               gzerror(gzf, NULL));
    97101      return -1;
    98102    }
    99     r += strlen(annotation);
    100103    *annotation_len_out = r;
    101104  } else {
    102105    *annotation_len_out = 0;
    103106  }
    104107
    105   written = fwrite(microdesc_get_body(md), 1, md->bodylen, f);
     108  written = gzwrite_all(gzf, microdesc_get_body(md), md->bodylen);
    106109  if (written != md->bodylen) {
    107110    log_warn(LD_DIR,
    108111             "Couldn't dump microdescriptor (wrote %lu out of %lu): %s",
    109112             (unsigned long)written, (unsigned long)md->bodylen,
    110              strerror(ferror(f)));
     113             gzerror(gzf, NULL));
    111114    return -1;
    112115  }
    113116  r += md->bodylen;
     
    128131    cache->cache_content->data = NULL;
    129132    cache->cache_content->size = cache->cache_content->mapping_size = 0;
    130133    HT_INIT(microdesc_map, &cache->map);
    131     cache->cache_fname = get_datadir_fname("cached-microdescs");
    132     cache->journal_fname = get_datadir_fname("cached-microdescs.new");
    133     microdesc_cache_reload(cache);
     134    cache->cache_fname = get_datadir_fname("cached-microdescs.gz");
     135    cache->cache_faltname = get_datadir_fname("cached-microdescs");
     136    /* update the global variable here as we may refer to it
     137     * during reload/rebuild (while writing bodies to file) */
    134138    the_microdesc_cache = cache;
     139    microdesc_cache_reload(cache);
    135140  }
    136141  return the_microdesc_cache;
    137142}
     
    208213                             int no_save)
    209214{
    210215  smartlist_t *added;
    211   open_file_t *open_file = NULL;
    212   FILE *f = NULL;
     216  gzopen_file_t *open_file = NULL;
     217  gzFile gzf = NULL;
    213218  //  int n_added = 0;
    214219  ssize_t size = 0;
    215220
    216221  if (where == SAVED_NOWHERE && !no_save) {
    217     f = start_writing_to_stdio_file(cache->journal_fname,
     222    gzf = gzstart_writing_to_file(cache->cache_fname,
    218223                                    OPEN_FLAGS_APPEND|O_BINARY,
    219224                                    0600, &open_file);
    220     if (!f) {
     225    if (!gzf) {
    221226      log_warn(LD_DIR, "Couldn't append to journal in %s: %s",
    222                cache->journal_fname, strerror(errno));
     227               cache->cache_fname, strerror(errno));
    223228      return NULL;
    224229    }
    225230  }
     
    240245    }
    241246
    242247    /* Okay, it's a new one. */
    243     if (f) {
     248    if (gzf) {
    244249      size_t annotation_len;
    245250      /* Write it to the file */
    246       size = dump_microdescriptor(f, md, &annotation_len);
     251      size = gzdump_microdescriptor(gzf, md, &annotation_len);
    247252      if (size < 0) {
    248253        /* we already warned in dump_microdescriptor; */
    249         abort_writing_to_file(open_file);
     254        gzabort_writing_to_file(open_file);
    250255        smartlist_clear(added);
    251256        return added;
    252257      }
     
    275280    cache->total_len_seen += md->bodylen;
    276281  } SMARTLIST_FOREACH_END(md);
    277282
    278   if (f)
    279     finish_writing_to_file(open_file); /*XXX Check me.*/
     283  if (gzf)
     284    gzfinish_writing_to_file(open_file); /*XXX Check me.*/
    280285
    281286  {
    282287    networkstatus_t *ns = networkstatus_get_latest_consensus();
     
    320325
    321326  microdesc_cache_clear(cache);
    322327
    323   load_file_into_mmap(cache->cache_content, cache->cache_fname);
    324   size_t cache_size = cache->cache_content->size;
    325 
    326   load_file_into_mmap(cache->cache_content, cache->journal_fname);
    327   cache->journal_len = cache->cache_content->size - cache_size;
     328  /* Load the cache file into an anonymous map first */
     329  if (gzload_file_into_mmap(cache->cache_content, cache->cache_fname) &&
     330      cache->cache_faltname) {
     331    log_notice(LD_DIR, "Couldn't read %s; trying to load routers from old "
     332                       "uncompressed location %s.",
     333               cache->cache_fname, cache->cache_faltname);
     334    /* Read the old cache */
     335    if (file_status(cache->cache_faltname) == FN_FILE)
     336      load_file_into_mmap(cache->cache_content, cache->cache_faltname);
     337    /* Load the old journal */
     338    char *altname;
     339    if (tor_asprintf(&altname, "%s.new", cache->cache_faltname)!=-1 &&
     340        file_status(altname) == FN_FILE)
     341      load_file_into_mmap(cache->cache_content, altname);
     342    tor_free(altname);
     343  }
    328344
    329345  if (cache->cache_content->size) {
    330346    added = microdescs_add_to_cache(cache, cache->cache_content->data,
     
    334350      total += smartlist_len(added);
    335351      smartlist_free(added);
    336352    }
     353    /* Force rebuild because we don't know how much of the
     354     * cache file had been journaled */
     355    microdesc_cache_rebuild(cache, 1);
    337356  }
    338357  log_notice(LD_DIR, "Reloaded microdescriptor cache.  Found %d descriptors.",
    339358             total);
    340359
    341   microdesc_cache_rebuild(cache, 0 /* don't force */);
    342 
    343360  return 0;
    344361}
    345362
     
    409426    return 0;
    410427}
    411428
    412 /** Regenerate the main cache file for <b>cache</b>, clear the journal file,
     429/** Regenerate the main cache file for <b>cache</b>
    413430 * and update every microdesc_t in the cache with pointers to its new
    414431 * location.  If <b>force</b> is true, do this unconditionally.  If
    415432 * <b>force</b> is false, do it only if we expect to save space on disk. */
    416433int
    417434microdesc_cache_rebuild(microdesc_cache_t *cache, int force)
    418435{
    419   open_file_t *open_file;
    420   FILE *f;
     436  gzopen_file_t *open_file;
     437  gzFile gzf;
    421438  microdesc_t **mdp;
    422439  smartlist_t *wrote;
    423440  ssize_t size;
     
    440457
    441458  orig_size = (int)(cache->cache_content ? cache->cache_content->size : 0);
    442459
    443   f = start_writing_to_stdio_file(cache->cache_fname,
     460  gzf = gzstart_writing_to_file(cache->cache_fname,
    444461                                  OPEN_FLAGS_REPLACE|O_BINARY,
    445462                                  0600, &open_file);
    446   if (!f)
     463  if (!gzf)
    447464    return -1;
    448465
    449466  wrote = smartlist_new();
     
    454471    if (md->no_save)
    455472      continue;
    456473
    457     size = dump_microdescriptor(f, md, &annotation_len);
     474    size = gzdump_microdescriptor(gzf, md, &annotation_len);
    458475    if (size < 0) {
    459476      /* XXX handle errors from dump_microdescriptor() */
    460477      /* log?  return -1?  die?  coredump the universe? */
     
    473490  if (cache->cache_content->data)
    474491    tor_munmap(cache->cache_content);
    475492
    476   finish_writing_to_file(open_file); /*XXX Check me.*/
     493  gzfinish_writing_to_file(open_file); /*XXX Check me.*/
    477494
    478   if (load_file_into_mmap(cache->cache_content, cache->cache_fname) && smartlist_len(wrote)) {
     495  if (gzload_file_into_mmap(cache->cache_content, cache->cache_fname) && smartlist_len(wrote)) {
    479496    log_err(LD_DIR, "Couldn't map file that we just wrote to %s!",
    480497            cache->cache_fname);
    481498    smartlist_free(wrote);
     
    503520
    504521  smartlist_free(wrote);
    505522
    506   write_str_to_file(cache->journal_fname, "", 1);
    507523  cache->journal_len = 0;
    508524  cache->bytes_dropped = 0;
    509525
    510   new_size = (int)cache->cache_content->size;
     526  new_size = cache->cache_content ? (int)cache->cache_content->size : 0;
    511527  log_info(LD_DIR, "Done rebuilding microdesc cache. "
    512528           "Saved %d bytes; %d still used.",
    513529           orig_size-new_size, new_size);
     
    602618  if (the_microdesc_cache) {
    603619    microdesc_cache_clear(the_microdesc_cache);
    604620    tor_free(the_microdesc_cache->cache_fname);
    605     tor_free(the_microdesc_cache->journal_fname);
     621    tor_free(the_microdesc_cache->cache_faltname);
    606622    tor_free(the_microdesc_cache->cache_content);
    607623    tor_free(the_microdesc_cache);
    608624  }
  • src/or/or.h

    diff -uNr tor-0.2.3.16-alpha-anon_mmap//src/or/or.h tor-0.2.3.16-alpha-gzipped_cache//src/or/or.h
    old new  
    16331633  /** The descriptor isn't stored on disk at all: the copy in memory is
    16341634   * canonical; the saved_offset field is meaningless. */
    16351635  SAVED_NOWHERE=0,
    1636   /** The descriptor is stored in the cached_routers file or in the journal.
    1637    * In both cases the body is kept in the anonymous mmap: the
     1636  /** The descriptor is stored in the cached_routers file: the
    16381637   * signed_descriptor_body is meaningless; the signed_descriptor_len and
    1639    * saved_offset are used to index into the memory map. */
     1638   * saved_offset are used to index into the mmaped cache file. */
    16401639  SAVED_IN_CACHE,
    1641   /** The descriptor is stored in the cached_routers.new file but we've
    1642    * failed to get space in the anonymous mmap (memory error). The
     1640  /** The descriptor is stored in the cached_routers.new file: the
    16431641   * signed_descriptor_body and saved_offset fields are both set. */
     1642  /* FFFF (We could also mmap the file and grow the mmap as needed, or
     1643   * lazy-load the descriptor text by using seek and read.  We don't, for
     1644   * now.)
     1645   */
    16441646  SAVED_IN_JOURNAL
    16451647} saved_location_t;
    16461648
     
    16941696  download_status_t ei_dl_status;
    16951697  /** Where is the descriptor saved? */
    16961698  saved_location_t saved_location;
    1697   /** The position of the body in anonymous memory map dedicated to holding
    1698    * descriptors that are saved in files. */
     1699  /** If saved_location is SAVED_IN_CACHE or SAVED_IN_JOURNAL, the offset of
     1700   * this descriptor in the corresponding file. */
    16991701  off_t saved_offset;
    17001702  /** What position is this descriptor within routerlist->routers or
    17011703   * routerlist->old_routers? -1 for none. */
     
    19471949  unsigned int held_by_nodes;
    19481950
    19491951  /** If saved_location == SAVED_IN_CACHE, this field holds the offset of the
    1950    * microdescriptor in the file that holds it cache. */
     1952   * microdescriptor in the cache. */
    19511953  off_t off;
    19521954
    19531955  /** If saved_location == SAVED_IN_CACHE, this field holds the offset to the
    1954    * microdescriptor body in the anonymous memory map */
     1956   * microdescriptor body in memory relative to the cache. */
    19551957  size_t cache_offset;
    19561958
    19571959  /* The string containing the microdesc. */
  • src/or/routerlist.c

    diff -uNr tor-0.2.3.16-alpha-anon_mmap//src/or/routerlist.c tor-0.2.3.16-alpha-gzipped_cache//src/or/routerlist.c
    old new  
    3333#include "router.h"
    3434#include "routerlist.h"
    3535#include "routerparse.h"
     36#include "gzutil.h"
    3637
    3738// #define DEBUG_ROUTERLIST
    3839
     
    583584
    584585/* Router descriptor storage.
    585586 *
    586  * Routerdescs are stored in a big file, named "cached-descriptors".  As new
    587  * routerdescs arrive, we append them to a journal file named
    588  * "cached-descriptors.new".
     587 * Routerdescs are stored in a big file, named "cached-descriptors.gz".  As new
     588 * routerdescs arrive, we append them there instead of a separate journal file.
    589589 *
    590  * From time to time, we replace "cached-descriptors" with a new file
    591  * containing only the live, non-superseded descriptors, and clear
    592  * cached-routers.new.
     590 * From time to time, we replace "cached-descriptors.gz" with a new file
     591 * containing only the live, non-superseded descriptors.
    593592 *
    594  * On startup, we read both files.
     593 * On startup, if we haven't read anything from "cached-descriptors.gz", we try
     594 * to read uncompressed data files from a previous version of Tor.
     595 *
     596 * "Journal" refers to the data that has been appended to the cache file
     597 * since the reload/rebuild.
    595598 */
    596599
    597600/** Helper: return 1 iff the router log is so big we want to rebuild the
     
    618621}
    619622
    620623/** Add the signed_descriptor_t in <b>desc</b> to the router
    621  * journal; change its saved_location to SAVED_IN_CACHE and set its
    622  * offset appropriately. The location is set to SAVED_IN_JOURNAL
    623  * only if we can't move the body to the cache memory map */
     624 * journal; change its saved_location to SAVED_IN_JOURNAL and set its
     625 * offset appropriately. */
    624626static int
    625627signed_desc_append_to_journal(signed_descriptor_t *desc,
    626628                              desc_store_t *store)
    627629{
    628   char *fname = get_datadir_fname_suffix(store->fname_base, ".new");
     630  char *fname = get_datadir_fname(store->fname_base);
    629631  const char *body = signed_descriptor_get_body_impl(desc,1);
    630632  size_t len = desc->signed_descriptor_len + desc->annotations_len;
    631633
    632   if (append_bytes_to_file(fname, body, len, 1)) {
     634  if (gzappend_bytes_to_file(fname, body, len, 1)) {
    633635    log_warn(LD_FS, "Unable to store router descriptor");
    634636    tor_free(fname);
    635637    return -1;
     
    749751      smartlist_add(chunk_list, c);
    750752    });
    751753
    752   if (write_chunks_to_file(fname, chunk_list, 1)<0) {
     754  if (gzwrite_chunks_to_file(fname, chunk_list, 1)<0) {
    753755    log_warn(LD_FS, "Error writing router store to disk.");
    754756    goto done;
    755757  }
     
    760762  }
    761763
    762764  errno = 0;
    763   load_file_into_mmap(store->mmap, fname);
     765  gzload_file_into_mmap(store->mmap, fname);
    764766  if (!store->mmap) {
    765767    if (errno == ERANGE) {
    766768      /* empty store.*/
     
    793795      signed_descriptor_get_body(sd); /* reconstruct and assert */
    794796    });
    795797
    796   tor_free(fname);
    797   fname = get_datadir_fname_suffix(store->fname_base, ".new");
    798   write_str_to_file(fname, "", 1);
    799 
    800798  r = 0;
    801799  store->store_len = (size_t) offset;
    802800  store->journal_len = 0;
     
    819817router_reload_router_list_impl(desc_store_t *store)
    820818{
    821819  char *fname = NULL, *altname = NULL;
    822   int read_from_old_location = 0;
    823820  int extrainfo = (store->type == EXTRAINFO_STORE);
    824   int r;
    825   size_t cache_size;
    826821  time_t now = time(NULL);
    827822  store->journal_len = store->store_len = 0;
    828823
     
    834829    tor_munmap(store->mmap);
    835830
    836831  /* Load the cache file into an anonymous map first */
    837   if (load_file_into_mmap(store->mmap, fname) &&
    838       altname && file_status(altname) == FN_FILE) {
     832  if (gzload_file_into_mmap(store->mmap, fname) && altname) {
    839833    log_notice(LD_DIR, "Couldn't read %s; trying to load routers from old "
    840                "location %s.", fname, altname);
    841     if (!(load_file_into_mmap(store->mmap, altname)))
    842       read_from_old_location = 1;
    843   }
    844   if (altname && !read_from_old_location) {
     834               "uncompressed location %s.", fname, altname);
     835    /* Read the old cache */
     836    if (file_status(altname) == FN_FILE)
     837      load_file_into_mmap(store->mmap, altname);
    845838    remove_file_if_very_old(altname, now);
    846   }
    847 
    848   /* Remeber the size of the current data, so we can determine
    849    * whether we've read anything from the journal */
    850   cache_size = store->mmap->size;
    851   tor_free(fname);
    852   fname = get_datadir_fname_suffix(store->fname_base, ".new");
    853   r = 0;
    854   if ((file_status(fname) == FN_FILE) && (r = load_file_into_mmap(store->mmap, fname)))
    855     log_warn(LD_DIR, "Couldn't read journal %s", fname);
    856   if (read_from_old_location) {
    857839    tor_free(altname);
     840    /* Load the old journal */
    858841    altname = get_datadir_fname_suffix(store->fname_alt_base, ".new");
    859     if (r)
     842    if (file_status(altname) == FN_FILE)
    860843      load_file_into_mmap(store->mmap, altname);
    861     else
    862       remove_file_if_very_old(altname, now);
     844    remove_file_if_very_old(altname, now);
    863845  }
    864846
    865847  /* Load router data from the map */
     
    878860  tor_free(fname);
    879861  tor_free(altname);
    880862
    881   /* rebuild if journal was not empty or we used old locations */
    882   if ((store->mmap->size-cache_size>0) || read_from_old_location) {
     863  /* Rebuild if we've read any data since we don't know
     864   * how much of it is a journal */
     865  if (store->mmap->size) {
    883866    /* Always clear the journal on startup.*/
    884867    router_rebuild_store(RRS_FORCE, store);
    885868  } else if (!extrainfo) {
     
    26602643    routerlist->desc_by_eid_map = sdmap_new();
    26612644    routerlist->extra_info_map = eimap_new();
    26622645
    2663     routerlist->desc_store.fname_base = "cached-descriptors";
    2664     routerlist->desc_store.fname_alt_base = "cached-routers";
    2665     routerlist->extrainfo_store.fname_base = "cached-extrainfo";
     2646    routerlist->desc_store.fname_base = "cached-descriptors.gz";
     2647    routerlist->desc_store.fname_alt_base = "cached-descriptors";
     2648    routerlist->extrainfo_store.fname_base = "cached-extrainfo.gz";
    26662649
    26672650    routerlist->desc_store.type = ROUTER_STORE;
    26682651    routerlist->extrainfo_store.type = EXTRAINFO_STORE;
  • src/test/test_microdesc.c

    diff -uNr tor-0.2.3.16-alpha-anon_mmap//src/test/test_microdesc.c tor-0.2.3.16-alpha-gzipped_cache//src/test/test_microdesc.c
    old new  
    33
    44#include "orconfig.h"
    55#include "or.h"
     6#include "gzutil.h"
    67
    78#include "config.h"
    89#include "microdesc.h"
     
    147148  test_mem_op(microdesc_get_body(md3), ==, test_md3_noannotation,
    148149              strlen(test_md3_noannotation));
    149150
    150   tor_asprintf(&fn, "%s"PATH_SEPARATOR"cached-microdescs.new",
     151  tor_asprintf(&fn, "%s"PATH_SEPARATOR"cached-microdescs.gz",
    151152               options->DataDirectory);
    152   s = read_file_to_str(fn, RFTS_BIN, NULL);
     153  map = tor_malloc(sizeof(tor_mmap_t));
     154  map->data = NULL;
     155  map->size = map->mapping_size = 0;
     156  gzload_file_into_mmap(map, fn);
     157  s = (char*)map->data;
    153158  tt_assert(s);
    154159  test_mem_op(microdesc_get_body(md1), ==, s + md1->off, md1->bodylen);
    155160  test_mem_op(microdesc_get_body(md2), ==, s + md2->off, md2->bodylen);
     
    160165  tt_int_op(smartlist_len(md3->family), ==, 3);
    161166  tt_str_op(smartlist_get(md3->family, 0), ==, "nodeX");
    162167
     168  tor_munmap(map);
     169  free(map);
     170
    163171  /* Now rebuild the cache! */
    164172  tt_int_op(microdesc_cache_rebuild(mc, 1), ==, 0);
    165173
     
    167175  tt_int_op(md2->saved_location, ==, SAVED_IN_CACHE);
    168176  tt_int_op(md3->saved_location, ==, SAVED_IN_CACHE);
    169177
    170   /* The journal should be empty now */
    171   tor_free(s);
    172   s = read_file_to_str(fn, RFTS_BIN, NULL);
    173   tt_str_op(s, ==, "");
    174   tor_free(s);
    175   tor_free(fn);
    176 
    177178  /* read the cache. */
    178   tor_asprintf(&fn, "%s"PATH_SEPARATOR"cached-microdescs",
    179               options->DataDirectory);
    180179  map = tor_malloc(sizeof(tor_mmap_t));
    181180  map->data = NULL;
    182181  map->size = map->mapping_size = 0;
    183   load_file_into_mmap(map, fn);
     182  gzload_file_into_mmap(map, fn);
    184183  s = (char*)map->data;
    185184  test_mem_op(microdesc_get_body(md1), ==, s + md1->off, strlen(test_md1));
    186185  test_mem_op(microdesc_get_body(md2), ==, s + md2->off, strlen(test_md2));