Ticket #20174: tor.patch

File tor.patch, 16.8 KB (added by haxxpop, 3 years ago)
  • scripts/maint/updateFallbackDirs.py

    From a76d1f50b19aba140f901cadffa2693b497533b9 Mon Sep 17 00:00:00 2001
    From: Suphanat Chunhapanya <haxx.pop@gmail.com>
    Date: Fri, 20 Jan 2017 15:37:32 +0700
    Subject: [PATCH 1/3] Automate checking existing fallbacks
    
    ---
     scripts/maint/updateFallbackDirs.py | 62 +++++++++++++++++++++++++++++--------
     1 file changed, 49 insertions(+), 13 deletions(-)
    
    diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
    index e0bc939..42ae0fa 100755
    a b import dateutil.parser 
    3737# bson_lazy provides bson
    3838#from bson import json_util
    3939import copy
     40import re
    4041
    4142from stem.descriptor import DocumentHandler
    4243from stem.descriptor.remote import get_consensus
    4344
    4445import logging
    45 # INFO tells you why each relay was included or excluded
    46 # WARN tells you about potential misconfigurations and relay detail changes
    47 logging.basicConfig(level=logging.WARNING)
    4846logging.root.name = ''
    49 # INFO tells you about each consensus download attempt
    50 logging.getLogger('stem').setLevel(logging.WARNING)
    5147
    5248HAVE_IPADDRESS = False
    5349try:
    BLACKLIST_EXCLUDES_WHITELIST_ENTRIES = True 
    148144
    149145WHITELIST_FILE_NAME = 'scripts/maint/fallback.whitelist'
    150146BLACKLIST_FILE_NAME = 'scripts/maint/fallback.blacklist'
     147FALLBACK_FILE_NAME  = 'src/or/fallback_dirs.inc'
    151148
    152149# The number of bytes we'll read from a filter file before giving up
    153150MAX_LIST_FILE_SIZE = 1024 * 1024
    def read_from_file(file_name, max_len): 
    368365                 )
    369366  return None
    370367
     368def parse_fallback_file(file_name):
     369  file_data = read_from_file(file_name, MAX_LIST_FILE_SIZE)
     370  file_data = cleanse_unprintable(file_data)
     371  file_data = remove_bad_chars(file_data, '\n"\0')
     372  file_data = re.sub('/\*.*?\*/', '', file_data)
     373  file_data = file_data.replace(',', '\n')
     374  file_data = file_data.replace(' weight=10', '')
     375  return file_data
     376
    371377def load_possibly_compressed_response_json(response):
    372378    if response.info().get('Content-Encoding') == 'gzip':
    373379      buf = StringIO.StringIO( response.read() )
    class CandidateList(dict): 
    14311437    self.fallbacks.sort(key=lambda f: f._data[data_field])
    14321438
    14331439  @staticmethod
    1434   def load_relaylist(file_name):
     1440  def load_relaylist(file_obj):
    14351441    """ Read each line in the file, and parse it like a FallbackDir line:
    14361442        an IPv4 address and optional port:
    14371443          <IPv4 address>:<port>
    class CandidateList(dict): 
    14461452        (of string -> string key/value pairs),
    14471453        and these dictionaries are placed in an array.
    14481454        comments start with # and are ignored """
     1455    file_data = file_obj['data']
     1456    file_name = file_obj['name']
    14491457    relaylist = []
    1450     file_data = read_from_file(file_name, MAX_LIST_FILE_SIZE)
    14511458    if file_data is None:
    14521459      return relaylist
    14531460    for line in file_data.split('\n'):
    class CandidateList(dict): 
    14881495    return relaylist
    14891496
    14901497  # apply the fallback whitelist and blacklist
    1491   def apply_filter_lists(self):
     1498  def apply_filter_lists(self, whitelist_obj, blacklist_obj):
    14921499    excluded_count = 0
    14931500    logging.debug('Applying whitelist and blacklist.')
    14941501    # parse the whitelist and blacklist
    1495     whitelist = self.load_relaylist(WHITELIST_FILE_NAME)
    1496     blacklist = self.load_relaylist(BLACKLIST_FILE_NAME)
     1502    whitelist = self.load_relaylist(whitelist_obj)
     1503    blacklist = self.load_relaylist(blacklist_obj)
    14971504    filtered_fallbacks = []
    14981505    for f in self.fallbacks:
    14991506      in_whitelist = f.is_in_whitelist(whitelist)
    class CandidateList(dict): 
    20642071      s += 'or setting INCLUDE_UNLISTED_ENTRIES = True.'
    20652072    return s
    20662073
     2074def process_existing():
     2075  logging.basicConfig(level=logging.INFO)
     2076  logging.getLogger('stem').setLevel(logging.INFO)
     2077  whitelist = {'data': parse_fallback_file(FALLBACK_FILE_NAME),
     2078               'name': FALLBACK_FILE_NAME}
     2079  blacklist = {'data': read_from_file(BLACKLIST_FILE_NAME, MAX_LIST_FILE_SIZE),
     2080               'name': BLACKLIST_FILE_NAME}
     2081  list_fallbacks(whitelist, blacklist)
     2082
     2083def process_default():
     2084  logging.basicConfig(level=logging.WARNING)
     2085  logging.getLogger('stem').setLevel(logging.WARNING)
     2086  whitelist = {'data': read_from_file(WHITELIST_FILE_NAME, MAX_LIST_FILE_SIZE),
     2087               'name': WHITELIST_FILE_NAME}
     2088  blacklist = {'data': read_from_file(BLACKLIST_FILE_NAME, MAX_LIST_FILE_SIZE),
     2089               'name': BLACKLIST_FILE_NAME}
     2090  list_fallbacks(whitelist, blacklist)
     2091
    20672092## Main Function
     2093def main():
     2094  if get_command() == 'check_existing':
     2095    process_existing()
     2096  else:
     2097    process_default()
     2098
     2099def get_command():
     2100  if len(sys.argv) == 2:
     2101    return sys.argv[1]
     2102  else:
     2103    return None
    20682104
    2069 def list_fallbacks():
     2105def list_fallbacks(whitelist, blacklist):
    20702106  """ Fetches required onionoo documents and evaluates the
    20712107      fallback directory criteria for each of the relays """
    20722108
    def list_fallbacks(): 
    20992135  # warning that the details have changed from those in the whitelist.
    21002136  # instead, there will be an info-level log during the eligibility check.
    21012137  initial_count = len(candidates.fallbacks)
    2102   excluded_count = candidates.apply_filter_lists()
     2138  excluded_count = candidates.apply_filter_lists(whitelist, blacklist)
    21032139  print candidates.summarise_filters(initial_count, excluded_count)
    21042140  eligible_count = len(candidates.fallbacks)
    21052141
    def list_fallbacks(): 
    21702206    print x.fallbackdir_line(candidates.fallbacks, prefilter_fallbacks)
    21712207
    21722208if __name__ == "__main__":
    2173   list_fallbacks()
     2209  main()
  • scripts/maint/updateFallbackDirs.py

    -- 
    2.7.4
    
    
    From 6a26248a9729f4c88e2f2d7a62daf0885b672792 Mon Sep 17 00:00:00 2001
    From: Suphanat Chunhapanya <haxx.pop@gmail.com>
    Date: Fri, 20 Jan 2017 15:38:30 +0700
    Subject: [PATCH 2/3] Logging existing fallbacks at warning level
    
    ---
     scripts/maint/updateFallbackDirs.py | 56 ++++++++++++++++++++-----------------
     1 file changed, 31 insertions(+), 25 deletions(-)
    
    diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
    index 42ae0fa..efa9bb4 100755
    a b class Candidate(object): 
    708708    #
    709709    # if the relay doesn't have a recommended version field, exclude the relay
    710710    if not self._data.has_key('recommended_version'):
    711       logging.info('%s not a candidate: no recommended_version field',
     711      log_excluded('%s not a candidate: no recommended_version field',
    712712                   self._fpr)
    713713      return False
    714714    if not self._data['recommended_version']:
    715       logging.info('%s not a candidate: version not recommended', self._fpr)
     715      log_excluded('%s not a candidate: version not recommended', self._fpr)
    716716      return False
    717717    # if the relay doesn't have version field, exclude the relay
    718718    if not self._data.has_key('version'):
    719       logging.info('%s not a candidate: no version field', self._fpr)
     719      log_excluded('%s not a candidate: no version field', self._fpr)
    720720      return False
    721721    if self._data['version'] in Candidate.STALE_CONSENSUS_VERSIONS:
    722722      logging.warning('%s not a candidate: version delivers stale consensuses',
    class Candidate(object): 
    870870  def is_candidate(self):
    871871    try:
    872872      if (MUST_BE_RUNNING_NOW and not self.is_running()):
    873         logging.info('%s not a candidate: not running now, unable to check ' +
     873        log_excluded('%s not a candidate: not running now, unable to check ' +
    874874                     'DirPort consensus download', self._fpr)
    875875        return False
    876876      if (self._data['last_changed_address_or_port'] >
    877877          self.CUTOFF_ADDRESS_AND_PORT_STABLE):
    878         logging.info('%s not a candidate: changed address/port recently (%s)',
     878        log_excluded('%s not a candidate: changed address/port recently (%s)',
    879879                     self._fpr, self._data['last_changed_address_or_port'])
    880880        return False
    881881      if self._running < CUTOFF_RUNNING:
    882         logging.info('%s not a candidate: running avg too low (%lf)',
     882        log_excluded('%s not a candidate: running avg too low (%lf)',
    883883                     self._fpr, self._running)
    884884        return False
    885885      if self._v2dir < CUTOFF_V2DIR:
    886         logging.info('%s not a candidate: v2dir avg too low (%lf)',
     886        log_excluded('%s not a candidate: v2dir avg too low (%lf)',
    887887                     self._fpr, self._v2dir)
    888888        return False
    889889      if self._badexit is not None and self._badexit > PERMITTED_BADEXIT:
    890         logging.info('%s not a candidate: badexit avg too high (%lf)',
     890        log_excluded('%s not a candidate: badexit avg too high (%lf)',
    891891                     self._fpr, self._badexit)
    892892        return False
    893893      # this function logs a message depending on which check fails
    894894      if not self.is_valid_version():
    895895        return False
    896896      if self._guard < CUTOFF_GUARD:
    897         logging.info('%s not a candidate: guard avg too low (%lf)',
     897        log_excluded('%s not a candidate: guard avg too low (%lf)',
    898898                     self._fpr, self._guard)
    899899        return False
    900900      if (not self._data.has_key('consensus_weight')
    901901          or self._data['consensus_weight'] < 1):
    902         logging.info('%s not a candidate: consensus weight invalid', self._fpr)
     902        log_excluded('%s not a candidate: consensus weight invalid', self._fpr)
    903903        return False
    904904    except BaseException as e:
    905905      logging.warning("Exception %s when checking if fallback is a candidate",
    class Candidate(object): 
    980980      for key in entry:
    981981        value = entry[key]
    982982        if key == 'id' and value == self._fpr:
    983           logging.info('%s is in the blacklist: fingerprint matches',
     983          log_excluded('%s is in the blacklist: fingerprint matches',
    984984                       self._fpr)
    985985          return True
    986986        if key == 'ipv4' and value == self.dirip:
    987987          # if the dirport is present, check it too
    988988          if entry.has_key('dirport'):
    989989            if int(entry['dirport']) == self.dirport:
    990               logging.info('%s is in the blacklist: IPv4 (%s) and ' +
     990              log_excluded('%s is in the blacklist: IPv4 (%s) and ' +
    991991                           'DirPort (%d) match', self._fpr, self.dirip,
    992992                           self.dirport)
    993993              return True
    994994          # if the orport is present, check it too
    995995          elif entry.has_key('orport'):
    996996            if int(entry['orport']) == self.orport:
    997               logging.info('%s is in the blacklist: IPv4 (%s) and ' +
     997              log_excluded('%s is in the blacklist: IPv4 (%s) and ' +
    998998                           'ORPort (%d) match', self._fpr, self.dirip,
    999999                           self.orport)
    10001000              return True
    10011001          else:
    1002             logging.info('%s is in the blacklist: IPv4 (%s) matches, and ' +
     1002            log_excluded('%s is in the blacklist: IPv4 (%s) matches, and ' +
    10031003                         'entry has no DirPort or ORPort', self._fpr,
    10041004                         self.dirip)
    10051005            return True
    class Candidate(object): 
    10131013            # if the dirport is present, check it too
    10141014            if entry.has_key('dirport'):
    10151015              if int(entry['dirport']) == self.dirport:
    1016                 logging.info('%s is in the blacklist: IPv6 (%s) and ' +
     1016                log_excluded('%s is in the blacklist: IPv6 (%s) and ' +
    10171017                             'DirPort (%d) match', self._fpr, ipv6,
    10181018                             self.dirport)
    10191019                return True
    10201020            # we've already checked the ORPort, it's part of entry['ipv6']
    10211021            else:
    1022               logging.info('%s is in the blacklist: IPv6 (%s) matches, and' +
     1022              log_excluded('%s is in the blacklist: IPv6 (%s) matches, and' +
    10231023                           'entry has no DirPort', self._fpr, ipv6)
    10241024              return True
    10251025        elif (key == 'ipv6' or self.has_ipv6()):
    10261026          # only log if the fingerprint matches but the IPv6 doesn't
    10271027          if entry.has_key('id') and entry['id'] == self._fpr:
    1028             logging.info('%s skipping IPv6 blacklist comparison: relay ' +
     1028            log_excluded('%s skipping IPv6 blacklist comparison: relay ' +
    10291029                         'has%s IPv6%s, but entry has%s IPv6%s', self._fpr,
    10301030                         '' if self.has_ipv6() else ' no',
    10311031                         (' (' + ipv6 + ')') if self.has_ipv6() else  '',
    class Candidate(object): 
    11931193      time_since_expiry = (end - consensus.valid_until).total_seconds()
    11941194    except Exception, stem_error:
    11951195      end = datetime.datetime.utcnow()
    1196       logging.info('Unable to retrieve a consensus from %s: %s', nickname,
     1196      log_excluded('Unable to retrieve a consensus from %s: %s', nickname,
    11971197                    stem_error)
    11981198      status = 'error: "%s"' % (stem_error)
    11991199      level = logging.WARNING
    class CandidateList(dict): 
    15201520      elif in_blacklist:
    15211521        # exclude
    15221522        excluded_count += 1
    1523         logging.info('Excluding %s: in blacklist.', f._fpr)
     1523        log_excluded('Excluding %s: in blacklist.', f._fpr)
    15241524      else:
    15251525        if INCLUDE_UNLISTED_ENTRIES:
    15261526          # include
    class CandidateList(dict): 
    15281528        else:
    15291529          # exclude
    15301530          excluded_count += 1
    1531           logging.info('Excluding %s: in neither blacklist nor whitelist.',
     1531          log_excluded('Excluding %s: in neither blacklist nor whitelist.',
    15321532                       f._fpr)
    15331533    self.fallbacks = filtered_fallbacks
    15341534    return excluded_count
    class CandidateList(dict): 
    15641564        # the bandwidth we log here is limited by the relay's consensus weight
    15651565        # as well as its adverttised bandwidth. See set_measured_bandwidth
    15661566        # for details
    1567         logging.info('%s not a candidate: bandwidth %.1fMByte/s too low, ' +
     1567        log_excluded('%s not a candidate: bandwidth %.1fMByte/s too low, ' +
    15681568                     'must be at least %.1fMByte/s', f._fpr,
    15691569                     f._data['measured_bandwidth']/(1024.0*1024.0),
    15701570                     MIN_BANDWIDTH/(1024.0*1024.0))
    class CandidateList(dict): 
    16681668          CandidateList.attribute_add(f.ipv6addr, ip_list)
    16691669      elif not CandidateList.attribute_allow(f.dirip, ip_list,
    16701670                                             MAX_FALLBACKS_PER_IPV4):
    1671         logging.info('Eliminated %s: already have %d fallback(s) on IPv4 %s'
     1671        log_excluded('Eliminated %s: already have %d fallback(s) on IPv4 %s'
    16721672                     %(f._fpr, CandidateList.attribute_count(f.dirip, ip_list),
    16731673                       f.dirip))
    16741674      elif (f.has_ipv6() and
    16751675            not CandidateList.attribute_allow(f.ipv6addr, ip_list,
    16761676                                              MAX_FALLBACKS_PER_IPV6)):
    1677         logging.info('Eliminated %s: already have %d fallback(s) on IPv6 %s'
     1677        log_excluded('Eliminated %s: already have %d fallback(s) on IPv6 %s'
    16781678                     %(f._fpr, CandidateList.attribute_count(f.ipv6addr,
    16791679                                                             ip_list),
    16801680                       f.ipv6addr))
    class CandidateList(dict): 
    16981698        contact_limit_fallbacks.append(f)
    16991699        CandidateList.attribute_add(f._data['contact'], contact_list)
    17001700      else:
    1701         logging.info(
     1701        log_excluded(
    17021702          'Eliminated %s: already have %d fallback(s) on ContactInfo %s'
    17031703          %(f._fpr, CandidateList.attribute_count(f._data['contact'],
    17041704                                                  contact_list),
    class CandidateList(dict): 
    17271727      else:
    17281728        # we already have a fallback with this fallback in its effective
    17291729        # family
    1730         logging.info(
     1730        log_excluded(
    17311731          'Eliminated %s: already have %d fallback(s) in effective family'
    17321732          %(f._fpr, CandidateList.attribute_count(f._fpr, fingerprint_list)))
    17331733    original_count = len(self.fallbacks)
    def get_command(): 
    21022102  else:
    21032103    return None
    21042104
     2105def log_excluded(msg, *args):
     2106  if get_command() == 'check_existing':
     2107    logging.warning(msg, *args)
     2108  else:
     2109    logging.info(msg, *args)
     2110
    21052111def list_fallbacks(whitelist, blacklist):
    21062112  """ Fetches required onionoo documents and evaluates the
    21072113      fallback directory criteria for each of the relays """
  • scripts/maint/updateFallbackDirs.py

    -- 
    2.7.4
    
    
    From 7c4aa169bf8bc1cc70fba7759603e06e3067a09f Mon Sep 17 00:00:00 2001
    From: Suphanat Chunhapanya <haxx.pop@gmail.com>
    Date: Fri, 20 Jan 2017 15:43:24 +0700
    Subject: [PATCH 3/3] Add usage for check_existing
    
    ---
     scripts/maint/updateFallbackDirs.py | 1 +
     1 file changed, 1 insertion(+)
    
    diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
    index efa9bb4..ce837b4 100755
    a b  
    11#!/usr/bin/python
    22
    33# Usage: scripts/maint/updateFallbackDirs.py > src/or/fallback_dirs.inc
     4#        scripts/maint/updateFallbackDirs.py check_existing
    45#
    56# This script should be run from a stable, reliable network connection,
    67# with no other network activity (and not over tor).