Ticket #16540: 0001-Use-metrics-lib-for-downloading-descriptors.patch

File 0001-Use-metrics-lib-for-downloading-descriptors.patch, 11.5 KB (added by leeroy, 4 years ago)

Revised Onionoo fix.

  • src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java

    From 4e2809b6e6b1378c1deaa54d39556b15309889a4 Mon Sep 17 00:00:00 2001
    From: leeroy <ter.one.leeboi@hush.com>
    Date: Wed, 15 Jul 2015 16:49:19 -0400
    Subject: [PATCH] Use metrics-lib for downloading descriptors.
    
    ---
     .../onionoo/updater/DescriptorDownloader.java      | 195 +++++----------------
     .../onionoo/updater/DescriptorSource.java          |  39 ++---
     2 files changed, 61 insertions(+), 173 deletions(-)
    
    diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java b/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java
    index d5b6c49..adc1e8f 100644
    a b  
    11package org.torproject.onionoo.updater;
    22
    3 import java.io.BufferedInputStream;
    4 import java.io.BufferedOutputStream;
    5 import java.io.BufferedReader;
    63import java.io.File;
    7 import java.io.FileOutputStream;
    84import java.io.IOException;
    9 import java.io.InputStream;
    10 import java.io.InputStreamReader;
    11 import java.net.HttpURLConnection;
    12 import java.net.URL;
    135import java.util.SortedSet;
     6import java.util.SortedMap;
    147import java.util.TreeSet;
    15 import java.util.zip.GZIPInputStream;
     8import java.util.TreeMap;
     9import java.util.Map;
    1610
    1711import org.slf4j.Logger;
    1812import org.slf4j.LoggerFactory;
     13import org.torproject.descriptor.impl.DescriptorCollectorImpl;
    1914
    20 class DescriptorDownloader {
     15class DescriptorDownloader extends DescriptorCollectorImpl {
    2116
    2217  private static Logger log = LoggerFactory.getLogger(
    2318      DescriptorDownloader.class);
    2419
    25   private final String protocolHostNameResourcePrefix =
     20  private final String host =
    2621      "https://collector.torproject.org/recent/";
    27 
    2822  private String directory;
    2923
    3024  private final File inDir = new File("in/recent");
    3125
    32   public DescriptorDownloader(DescriptorType descriptorType) {
     26  private void setDirectory(DescriptorType descriptorType) {
    3327    switch (descriptorType) {
    34     case RELAY_CONSENSUSES:
    35       this.directory = "relay-descriptors/consensuses/";
    36       break;
    37     case RELAY_SERVER_DESCRIPTORS:
    38       this.directory = "relay-descriptors/server-descriptors/";
    39       break;
    40     case RELAY_EXTRA_INFOS:
    41       this.directory = "relay-descriptors/extra-infos/";
    42       break;
    43     case EXIT_LISTS:
    44       this.directory = "exit-lists/";
    45       break;
    46     case BRIDGE_STATUSES:
    47       this.directory = "bridge-descriptors/statuses/";
    48       break;
    49     case BRIDGE_SERVER_DESCRIPTORS:
    50       this.directory = "bridge-descriptors/server-descriptors/";
    51       break;
    52     case BRIDGE_EXTRA_INFOS:
    53       this.directory = "bridge-descriptors/extra-infos/";
    54       break;
    55     default:
    56       log.error("Unknown descriptor type.");
    57       return;
    58     }
    59   }
    60 
    61   private SortedSet<String> localFiles = new TreeSet<String>();
    62 
    63   public int statLocalFiles() {
    64     File localDirectory = new File(this.inDir, this.directory);
    65     if (localDirectory.exists()) {
    66       for (File file : localDirectory.listFiles()) {
    67         this.localFiles.add(file.getName());
    68       }
    69     }
    70     return this.localFiles.size();
    71   }
    72 
    73   private SortedSet<String> remoteFiles = new TreeSet<String>();
    74 
    75   public int fetchRemoteDirectory() {
    76     String directoryUrl = this.protocolHostNameResourcePrefix
    77         + this.directory;
    78     try {
    79       URL u = new URL(directoryUrl);
    80       HttpURLConnection huc = (HttpURLConnection) u.openConnection();
    81       huc.setRequestMethod("GET");
    82       huc.connect();
    83       if (huc.getResponseCode() != 200) {
    84         log.error("Could not fetch " + directoryUrl
    85             + ": " + huc.getResponseCode() + " "
    86             + huc.getResponseMessage() + ".  Skipping.");
    87         return 0;
    88       }
    89       BufferedReader br = new BufferedReader(new InputStreamReader(
    90           huc.getInputStream()));
    91       String line;
    92       while ((line = br.readLine()) != null) {
    93         if (!line.trim().startsWith("<tr>") ||
    94             !line.contains("<a href=\"")) {
    95           continue;
    96         }
    97         String linePart = line.substring(
    98             line.indexOf("<a href=\"") + "<a href=\"".length());
    99         if (!linePart.contains("\"")) {
    100           continue;
    101         }
    102         linePart = linePart.substring(0, linePart.indexOf("\""));
    103         if (linePart.endsWith("/")) {
    104           continue;
    105         }
    106         this.remoteFiles.add(linePart);
    107       }
    108       br.close();
    109     } catch (IOException e) {
    110       log.error("Could not fetch or parse " + directoryUrl
    111         + ".  Skipping. Reason: " + e.getMessage());
     28      case RELAY_CONSENSUSES:
     29        this.directory = "relay-descriptors/consensuses/";
     30        break;
     31      case RELAY_SERVER_DESCRIPTORS:
     32        this.directory = "relay-descriptors/server-descriptors/";
     33        break;
     34      case RELAY_EXTRA_INFOS:
     35        this.directory = "relay-descriptors/extra-infos/";
     36        break;
     37      case EXIT_LISTS:
     38        this.directory = "exit-lists/";
     39        break;
     40      case BRIDGE_STATUSES:
     41        this.directory = "bridge-descriptors/statuses/";
     42        break;
     43      case BRIDGE_SERVER_DESCRIPTORS:
     44        this.directory = "bridge-descriptors/server-descriptors/";
     45        break;
     46      case BRIDGE_EXTRA_INFOS:
     47        this.directory = "bridge-descriptors/extra-infos/";
     48        break;
     49      default:
     50        log.error("Unknown descriptor type.");
     51        this.directory = null;
     52        return;
    11253    }
    113     return this.remoteFiles.size();
    11454  }
    11555
    116   public int fetchRemoteFiles() {
    117     int fetchedFiles = 0;
    118     for (String remoteFile : this.remoteFiles) {
    119       if (this.localFiles.contains(remoteFile)) {
    120         continue;
    121       }
    122       String fileUrl = this.protocolHostNameResourcePrefix
    123           + this.directory + remoteFile;
    124       File localTempFile = new File(this.inDir, this.directory
    125           + remoteFile + ".tmp");
    126       File localFile = new File(this.inDir, this.directory + remoteFile);
    127       try {
    128         localFile.getParentFile().mkdirs();
    129         URL u = new URL(fileUrl);
    130         HttpURLConnection huc = (HttpURLConnection) u.openConnection();
    131         huc.setRequestMethod("GET");
    132         huc.addRequestProperty("Accept-Encoding", "gzip");
    133         huc.connect();
    134         if (huc.getResponseCode() != 200) {
    135           log.error("Could not fetch \n\t" + fileUrl
    136               + ": " + huc.getResponseCode() + " "
    137               + huc.getResponseMessage() + ".  Skipping.");
    138           continue;
    139         }
    140         long lastModified = huc.getHeaderFieldDate("Last-Modified", -1L);
    141         InputStream is;
    142         if (huc.getContentEncoding() != null &&
    143             huc.getContentEncoding().equalsIgnoreCase("gzip")) {
    144           is = new GZIPInputStream(huc.getInputStream());
    145         } else {
    146           is = huc.getInputStream();
    147         }
    148         BufferedInputStream bis = new BufferedInputStream(is);
    149         BufferedOutputStream bos = new BufferedOutputStream(
    150             new FileOutputStream(localTempFile));
    151         int len;
    152         byte[] data = new byte[1024];
    153         while ((len = bis.read(data, 0, 1024)) >= 0) {
    154           bos.write(data, 0, len);
    155         }
    156         bis.close();
    157         bos.close();
    158         localTempFile.renameTo(localFile);
    159         if (lastModified >= 0) {
    160           localFile.setLastModified(lastModified);
    161         }
    162         fetchedFiles++;
    163       } catch (IOException e) {
    164         log.error("Could not fetch or store \n\t" + fileUrl
    165             + ".  Skipping.\n\tReason: " + e.getMessage());
    166       }
     56  public void collectDescriptors(DescriptorType[] types) {
     57    String[] directories = new String[types.length];
     58    for (int i = 0; i < types.length; i++) {
     59      setDirectory(types[i]);
     60      directories[i] = this.directory;
    16761    }
    168     return fetchedFiles;
     62    collectDescriptors(host, directories, 0L, inDir, true);
    16963  }
    17064
    171   public int deleteOldLocalFiles() {
    172     int deletedFiles = 0;
    173     for (String localFile : this.localFiles) {
    174       if (!this.remoteFiles.contains(localFile)) {
    175         new File(this.inDir, this.directory + localFile).delete();
    176         deletedFiles++;
    177       }
     65  public void collectDescriptors(DescriptorType type) {
     66    setDirectory(type);
     67    if (directory != null) {
     68      String[] d = { directory };
     69      collectDescriptors(host, d, 0L, inDir, true);
    17870    }
    179     return deletedFiles;
    18071  }
    18172}
  • src/main/java/org/torproject/onionoo/updater/DescriptorSource.java

    diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java b/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
    index a1e489e..b938912 100644
    a b public class DescriptorSource { 
    2828  private List<DescriptorQueue> descriptorQueues;
    2929
    3030  private DescriptorQueue archiveDescriptorQueue;
     31  private DescriptorDownloader descriptorDownloader;
    3132
    3233  public DescriptorSource() {
     34    descriptorDownloader = new DescriptorDownloader();
    3335    this.descriptorQueues = new ArrayList<DescriptorQueue>();
    3436    this.descriptorListeners =
    3537        new HashMap<DescriptorType, Set<DescriptorListener>>();
    public class DescriptorSource { 
    6062  }
    6163
    6264  public void downloadDescriptors() {
    63     for (DescriptorType descriptorType : DescriptorType.values()) {
    64       log.info("Loading: " + descriptorType);
    65       this.downloadDescriptors(descriptorType);
     65    DescriptorType[] types = DescriptorType.values();
     66    String loading = "";   
     67    for (DescriptorType descriptorType : types) {
     68      loading += descriptorType + " ";
    6669    }
     70    log.info("Loading descriptors: " + loading);
     71    descriptorDownloader.collectDescriptors(types);
    6772  }
    6873
    69   private int localFilesBefore = 0, foundRemoteFiles = 0,
    70       downloadedFiles = 0, deletedLocalFiles = 0;
    71 
    7274  private void downloadDescriptors(DescriptorType descriptorType) {
    73     DescriptorDownloader descriptorDownloader =
    74         new DescriptorDownloader(descriptorType);
    75     this.localFilesBefore += descriptorDownloader.statLocalFiles();
    76     this.foundRemoteFiles +=
    77         descriptorDownloader.fetchRemoteDirectory();
    78     this.downloadedFiles += descriptorDownloader.fetchRemoteFiles();
    79     this.deletedLocalFiles += descriptorDownloader.deleteOldLocalFiles();
     75    log.info("Loading: " + descriptorType);
     76    descriptorDownloader.collectDescriptors(descriptorType);
    8077  }
    8178
    8279  public void readDescriptors() {
    public class DescriptorSource { 
    209206
    210207  public String getStatsString() {
    211208    StringBuilder sb = new StringBuilder();
    212     sb.append("    " + this.localFilesBefore + " recent descriptor files "
    213         + "found locally\n");
    214     sb.append("    " + this.foundRemoteFiles + " recent descriptor files "
    215         + "found remotely\n");
    216     sb.append("    " + this.downloadedFiles + " recent descriptor files "
    217         + "downloaded from remote\n");
    218     sb.append("    " + this.deletedLocalFiles + " recent descriptor "
    219         + "files deleted locally\n");
     209    sb.append("    " + descriptorDownloader.getLocalFilesBefore()
     210        + " recent descriptor files found locally\n");
     211    sb.append("    " + descriptorDownloader.getFoundRemoteFiles()
     212        + " recent descriptor files found remotely\n");
     213    sb.append("    " + descriptorDownloader.getDownloadedFiles()
     214        + " recent descriptor files downloaded from remote\n");
     215    sb.append("    " + descriptorDownloader.getDeletedLocalFiles()
     216        + " recent descriptor files deleted locally\n");
    220217    sb.append("    " + this.descriptorQueues.size() + " descriptor "
    221218        + "queues created for recent descriptors\n");
    222219    int historySizeBefore = 0, historySizeAfter = 0;