Ticket #16540: 0001-Modifications-of-DescriptorDownloader-and-Source.patch

File 0001-Modifications-of-DescriptorDownloader-and-Source.patch, 13.3 KB (added by leeroy, 4 years ago)
  • src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java

    From e393735b9aec11a4351ff8928d47852699e0d8bf Mon Sep 17 00:00:00 2001
    From: leeroy <ter.one.leeboi@hush.com>
    Date: Sat, 11 Jul 2015 23:55:12 -0400
    Subject: [PATCH] Modifications of DescriptorDownloader and Source.
    
    ---
     .../onionoo/updater/DescriptorDownloader.java      | 239 ++++++++-------------
     .../onionoo/updater/DescriptorSource.java          |  29 +--
     2 files changed, 104 insertions(+), 164 deletions(-)
    
    diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java b/src/main/java/org/torproject/onionoo/updater/DescriptorDownloader.java
    index d5b6c49..9ac6941 100644
    a b  
    11package org.torproject.onionoo.updater;
    22
    3 import java.io.BufferedInputStream;
    4 import java.io.BufferedOutputStream;
    5 import java.io.BufferedReader;
    63import java.io.File;
    7 import java.io.FileOutputStream;
    84import java.io.IOException;
    9 import java.io.InputStream;
    10 import java.io.InputStreamReader;
    11 import java.net.HttpURLConnection;
    12 import java.net.URL;
    135import java.util.SortedSet;
     6import java.util.SortedMap;
    147import java.util.TreeSet;
    15 import java.util.zip.GZIPInputStream;
     8import java.util.TreeMap;
     9import java.util.Map;
    1610
    1711import org.slf4j.Logger;
    1812import org.slf4j.LoggerFactory;
     13import org.torproject.descriptor.impl.DescriptorCollectorImpl;
    1914
    20 class DescriptorDownloader {
     15class DescriptorDownloader extends DescriptorCollectorImpl {
    2116
    2217  private static Logger log = LoggerFactory.getLogger(
    2318      DescriptorDownloader.class);
    2419
    25   private final String protocolHostNameResourcePrefix =
     20  private final String host =
    2621      "https://collector.torproject.org/recent/";
    27 
    2822  private String directory;
    2923
    3024  private final File inDir = new File("in/recent");
    3125
    32   public DescriptorDownloader(DescriptorType descriptorType) {
     26  private void setDirectoryForType(DescriptorType descriptorType) {
    3327    switch (descriptorType) {
    34     case RELAY_CONSENSUSES:
    35       this.directory = "relay-descriptors/consensuses/";
    36       break;
    37     case RELAY_SERVER_DESCRIPTORS:
    38       this.directory = "relay-descriptors/server-descriptors/";
    39       break;
    40     case RELAY_EXTRA_INFOS:
    41       this.directory = "relay-descriptors/extra-infos/";
    42       break;
    43     case EXIT_LISTS:
    44       this.directory = "exit-lists/";
    45       break;
    46     case BRIDGE_STATUSES:
    47       this.directory = "bridge-descriptors/statuses/";
    48       break;
    49     case BRIDGE_SERVER_DESCRIPTORS:
    50       this.directory = "bridge-descriptors/server-descriptors/";
    51       break;
    52     case BRIDGE_EXTRA_INFOS:
    53       this.directory = "bridge-descriptors/extra-infos/";
    54       break;
    55     default:
    56       log.error("Unknown descriptor type.");
    57       return;
     28      case RELAY_CONSENSUSES:
     29        this.directory = "relay-descriptors/consensuses/";
     30        break;
     31      case RELAY_SERVER_DESCRIPTORS:
     32        this.directory = "relay-descriptors/server-descriptors/";
     33        break;
     34      case RELAY_EXTRA_INFOS:
     35        this.directory = "relay-descriptors/extra-infos/";
     36        break;
     37      case EXIT_LISTS:
     38        this.directory = "exit-lists/";
     39        break;
     40      case BRIDGE_STATUSES:
     41        this.directory = "bridge-descriptors/statuses/";
     42        break;
     43      case BRIDGE_SERVER_DESCRIPTORS:
     44        this.directory = "bridge-descriptors/server-descriptors/";
     45        break;
     46      case BRIDGE_EXTRA_INFOS:
     47        this.directory = "bridge-descriptors/extra-infos/";
     48        break;
     49      default:
     50        log.error("Unknown descriptor type.");
     51        return;
    5852    }
    5953  }
    6054
    61   private SortedSet<String> localFiles = new TreeSet<String>();
    62 
    63   public int statLocalFiles() {
    64     File localDirectory = new File(this.inDir, this.directory);
    65     if (localDirectory.exists()) {
    66       for (File file : localDirectory.listFiles()) {
    67         this.localFiles.add(file.getName());
    68       }
     55  public void collectDescriptors(DescriptorType type) {
     56    setDirectoryForType(type);
     57    if (directory != null) {
     58      String[] d = { directory };
     59      collectDescriptors(host, d, 0L, inDir, true);
    6960    }
    70     return this.localFiles.size();
    7161  }
    7262
    73   private SortedSet<String> remoteFiles = new TreeSet<String>();
     63  private int localFilesBefore = 0, foundRemoteFiles = 0,
     64              downloadedFiles = 0, deletedLocalFiles = 0;
    7465
    75   public int fetchRemoteDirectory() {
    76     String directoryUrl = this.protocolHostNameResourcePrefix
    77         + this.directory;
    78     try {
    79       URL u = new URL(directoryUrl);
    80       HttpURLConnection huc = (HttpURLConnection) u.openConnection();
    81       huc.setRequestMethod("GET");
    82       huc.connect();
    83       if (huc.getResponseCode() != 200) {
    84         log.error("Could not fetch " + directoryUrl
    85             + ": " + huc.getResponseCode() + " "
    86             + huc.getResponseMessage() + ".  Skipping.");
    87         return 0;
    88       }
    89       BufferedReader br = new BufferedReader(new InputStreamReader(
    90           huc.getInputStream()));
    91       String line;
    92       while ((line = br.readLine()) != null) {
    93         if (!line.trim().startsWith("<tr>") ||
    94             !line.contains("<a href=\"")) {
    95           continue;
    96         }
    97         String linePart = line.substring(
    98             line.indexOf("<a href=\"") + "<a href=\"".length());
    99         if (!linePart.contains("\"")) {
    100           continue;
    101         }
    102         linePart = linePart.substring(0, linePart.indexOf("\""));
    103         if (linePart.endsWith("/")) {
    104           continue;
    105         }
    106         this.remoteFiles.add(linePart);
    107       }
    108       br.close();
    109     } catch (IOException e) {
    110       log.error("Could not fetch or parse " + directoryUrl
    111         + ".  Skipping. Reason: " + e.getMessage());
     66  public void collectDescriptors(String collecTorBaseUrl,
     67      String[] remoteDirectories, long minLastModified,
     68      File localDirectory, boolean deleteExtraneousLocalFiles) {
     69    collecTorBaseUrl = collecTorBaseUrl.endsWith("/") ?
     70      collecTorBaseUrl.substring(0, collecTorBaseUrl.length() - 1):
     71      collecTorBaseUrl;
     72    if (minLastModified < 0) {
     73      throw new IllegalArgumentException("A negative minimum "
     74        + "last-modified time is not permitted.");
    11275    }
    113     return this.remoteFiles.size();
    114   }
    115 
    116   public int fetchRemoteFiles() {
    117     int fetchedFiles = 0;
    118     for (String remoteFile : this.remoteFiles) {
    119       if (this.localFiles.contains(remoteFile)) {
     76    if (localDirectory.exists() && !localDirectory.isDirectory()) {
     77      throw new IllegalArgumentException("Local directory already exists "
     78        + "and is not a directory.");
     79    }
     80    SortedMap<String, Long> localFiles =
     81        super.statLocalDirectory(localDirectory);
     82    localFilesBefore = localFiles.size();
     83    SortedMap<String, String> fetchedDirectoryListings =
     84        super.fetchRemoteDirectories(collecTorBaseUrl, remoteDirectories);
     85    foundRemoteFiles = fetchedDirectoryListings.size();
     86    SortedSet<String> parsedDirectories = new TreeSet<String>();
     87    SortedMap<String, Long> remoteFiles = new TreeMap<String, Long>();
     88    for (Map.Entry<String, String> e :
     89        fetchedDirectoryListings.entrySet()) {
     90      String remoteDirectory = e.getKey();
     91      String directoryListing = e.getValue();
     92      SortedMap<String, Long> parsedRemoteFiles =
     93          super.parseDirectoryListing(remoteDirectory, directoryListing);
     94      if (parsedRemoteFiles == null) {
    12095        continue;
    12196      }
    122       String fileUrl = this.protocolHostNameResourcePrefix
    123           + this.directory + remoteFile;
    124       File localTempFile = new File(this.inDir, this.directory
    125           + remoteFile + ".tmp");
    126       File localFile = new File(this.inDir, this.directory + remoteFile);
    127       try {
    128         localFile.getParentFile().mkdirs();
    129         URL u = new URL(fileUrl);
    130         HttpURLConnection huc = (HttpURLConnection) u.openConnection();
    131         huc.setRequestMethod("GET");
    132         huc.addRequestProperty("Accept-Encoding", "gzip");
    133         huc.connect();
    134         if (huc.getResponseCode() != 200) {
    135           log.error("Could not fetch \n\t" + fileUrl
    136               + ": " + huc.getResponseCode() + " "
    137               + huc.getResponseMessage() + ".  Skipping.");
    138           continue;
    139         }
    140         long lastModified = huc.getHeaderFieldDate("Last-Modified", -1L);
    141         InputStream is;
    142         if (huc.getContentEncoding() != null &&
    143             huc.getContentEncoding().equalsIgnoreCase("gzip")) {
    144           is = new GZIPInputStream(huc.getInputStream());
    145         } else {
    146           is = huc.getInputStream();
    147         }
    148         BufferedInputStream bis = new BufferedInputStream(is);
    149         BufferedOutputStream bos = new BufferedOutputStream(
    150             new FileOutputStream(localTempFile));
    151         int len;
    152         byte[] data = new byte[1024];
    153         while ((len = bis.read(data, 0, 1024)) >= 0) {
    154           bos.write(data, 0, len);
    155         }
    156         bis.close();
    157         bos.close();
    158         localTempFile.renameTo(localFile);
    159         if (lastModified >= 0) {
    160           localFile.setLastModified(lastModified);
    161         }
    162         fetchedFiles++;
    163       } catch (IOException e) {
    164         log.error("Could not fetch or store \n\t" + fileUrl
    165             + ".  Skipping.\n\tReason: " + e.getMessage());
    166       }
     97      parsedDirectories.add(remoteDirectory);
     98      remoteFiles.putAll(parsedRemoteFiles);
     99    }   
     100    super.fetchRemoteFiles(collecTorBaseUrl, remoteFiles, minLastModified,
     101        localDirectory, localFiles);
     102    downloadedFiles = remoteFiles.size();
     103    if (deleteExtraneousLocalFiles) {
     104      deletedLocalFiles = deleteExtraneousLocalFiles(
     105        parsedDirectories, remoteFiles,localDirectory, localFiles);
    167106    }
    168     return fetchedFiles;
    169107  }
    170108
    171   public int deleteOldLocalFiles() {
    172     int deletedFiles = 0;
    173     for (String localFile : this.localFiles) {
    174       if (!this.remoteFiles.contains(localFile)) {
    175         new File(this.inDir, this.directory + localFile).delete();
    176         deletedFiles++;
    177       }
    178     }
    179     return deletedFiles;
     109  int deleteExtraneousLocalFiles(
     110      SortedSet<String> parsedDirectories,
     111      SortedMap<String, Long> remoteFiles, File localDirectory,
     112      SortedMap<String, Long> localFiles) {
     113    int deleted = 0;
     114    for (String localPath : localFiles.keySet())
     115      for (String remoteDirectory : parsedDirectories)
     116        if (localPath.startsWith(remoteDirectory))
     117          if (!remoteFiles.containsKey(localPath)) {
     118            new File(localDirectory.getPath() + localPath).delete();
     119            deleted++;
     120          }
     121    return deleted;
    180122  }
     123
     124  int getLocalFilesBefore() { return localFilesBefore; }
     125  int getFoundRemoteFiles() { return foundRemoteFiles; }
     126  int getDownloadedFiles() { return downloadedFiles; }
     127  int getDeletedLocalFiles() { return deletedLocalFiles; }
    181128}
  • src/main/java/org/torproject/onionoo/updater/DescriptorSource.java

    diff --git a/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java b/src/main/java/org/torproject/onionoo/updater/DescriptorSource.java
    index a1e489e..69541b2 100644
    a b public class DescriptorSource { 
    2828  private List<DescriptorQueue> descriptorQueues;
    2929
    3030  private DescriptorQueue archiveDescriptorQueue;
     31  private DescriptorDownloader dd;
    3132
    3233  public DescriptorSource() {
     34    dd = new DescriptorDownloader();
    3335    this.descriptorQueues = new ArrayList<DescriptorQueue>();
    3436    this.descriptorListeners =
    3537        new HashMap<DescriptorType, Set<DescriptorListener>>();
    public class DescriptorSource { 
    6668    }
    6769  }
    6870
    69   private int localFilesBefore = 0, foundRemoteFiles = 0,
    70       downloadedFiles = 0, deletedLocalFiles = 0;
    71 
    7271  private void downloadDescriptors(DescriptorType descriptorType) {
    73     DescriptorDownloader descriptorDownloader =
    74         new DescriptorDownloader(descriptorType);
    75     this.localFilesBefore += descriptorDownloader.statLocalFiles();
    76     this.foundRemoteFiles +=
    77         descriptorDownloader.fetchRemoteDirectory();
    78     this.downloadedFiles += descriptorDownloader.fetchRemoteFiles();
    79     this.deletedLocalFiles += descriptorDownloader.deleteOldLocalFiles();
     72    dd.collectDescriptors(descriptorType);
    8073  }
    8174
    8275  public void readDescriptors() {
    public class DescriptorSource { 
    209202
    210203  public String getStatsString() {
    211204    StringBuilder sb = new StringBuilder();
    212     sb.append("    " + this.localFilesBefore + " recent descriptor files "
    213         + "found locally\n");
    214     sb.append("    " + this.foundRemoteFiles + " recent descriptor files "
    215         + "found remotely\n");
    216     sb.append("    " + this.downloadedFiles + " recent descriptor files "
    217         + "downloaded from remote\n");
    218     sb.append("    " + this.deletedLocalFiles + " recent descriptor "
    219         + "files deleted locally\n");
     205    sb.append("    " + dd.getLocalFilesBefore()
     206        + " recent descriptor files found locally\n");
     207    sb.append("    " + dd.getFoundRemoteFiles()
     208        + " recent descriptor files found remotely\n");
     209    sb.append("    " + dd.getDownloadedFiles()
     210        + " recent descriptor files downloaded from remote\n");
     211    sb.append("    " + dd.getDeletedLocalFiles()
     212        + " recent descriptor files deleted locally\n");
    220213    sb.append("    " + this.descriptorQueues.size() + " descriptor "
    221214        + "queues created for recent descriptors\n");
    222215    int historySizeBefore = 0, historySizeAfter = 0;