From e6ffd7caf8039ac9329017d0ce75aab28ad607de Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 16:44:09 -0400 Subject: [PATCH 01/79] remove hashwatcher --- lbrynet/dht/hashwatcher.py | 33 --------------------------------- lbrynet/dht/node.py | 7 ------- 2 files changed, 40 deletions(-) delete mode 100644 lbrynet/dht/hashwatcher.py diff --git a/lbrynet/dht/hashwatcher.py b/lbrynet/dht/hashwatcher.py deleted file mode 100644 index 37f8218fdf..0000000000 --- a/lbrynet/dht/hashwatcher.py +++ /dev/null @@ -1,33 +0,0 @@ -from collections import Counter -import datetime -from twisted.internet import task - - -class HashWatcher(object): - def __init__(self, clock=None): - if not clock: - from twisted.internet import reactor as clock - self.ttl = 600 - self.hashes = [] - self.lc = task.LoopingCall(self._remove_old_hashes) - self.lc.clock = clock - - def start(self): - return self.lc.start(10) - - def stop(self): - return self.lc.stop() - - def add_requested_hash(self, hashsum, contact): - from_ip = contact.compact_ip - matching_hashes = [h for h in self.hashes if h[0] == hashsum and h[2] == from_ip] - if len(matching_hashes) == 0: - self.hashes.append((hashsum, datetime.datetime.now(), from_ip)) - - def most_popular_hashes(self, num_to_return=10): - hash_counter = Counter([h[0] for h in self.hashes]) - return hash_counter.most_common(num_to_return) - - def _remove_old_hashes(self): - remove_time = datetime.datetime.now() - datetime.timedelta(minutes=10) - self.hashes = [h for h in self.hashes if h[1] < remove_time] diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 7e088b1529..7c48e8c31d 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -25,7 +25,6 @@ from error import TimeoutError from peerfinder import DHTPeerFinder from contact import Contact -from hashwatcher import HashWatcher from distance import Distance @@ -137,8 +136,6 @@ def __init__(self, node_id=None, udpPort=4000, dataStore=None, self._routingTable.addContact(contact) self.externalIP = externalIP self.peerPort = peerPort - self.hash_watcher = HashWatcher(self.clock) - self.peer_manager = peer_manager or PeerManager() self.peer_finder = peer_finder or DHTPeerFinder(self, self.peer_manager) @@ -156,8 +153,6 @@ def stop(self): yield self.change_token_lc.stop() if self._listeningPort is not None: yield self._listeningPort.stopListening() - if self.hash_watcher.lc.running: - yield self.hash_watcher.stop() def start_listening(self): if not self._listeningPort: @@ -223,7 +218,6 @@ def joinNetwork(self, known_node_addresses=None): # Start refreshing k-buckets periodically, if necessary self.bootstrap_join(known_node_addresses or [], self._joinDeferred) yield self._joinDeferred - self.hash_watcher.start() self.change_token_lc.start(constants.tokenSecretChangeInterval) self.refresh_node_lc.start(constants.checkRefreshInterval) @@ -570,7 +564,6 @@ def findValue(self, key, **kwargs): contact = kwargs['_rpcNodeContact'] compact_ip = contact.compact_ip() rval['token'] = self.make_token(compact_ip) - self.hash_watcher.add_requested_hash(key, contact) return rval def _generateID(self): From 4bd9f3bd681f6d8628ce3c41e6d677ff7a506725 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 16:48:47 -0400 Subject: [PATCH 02/79] remove popular hash tracking, simplify DHTPeerFinder --- lbrynet/core/BlobAvailability.py | 15 ----------- lbrynet/dht/node.py | 3 --- lbrynet/dht/peerfinder.py | 46 +++++++------------------------- 3 files changed, 10 insertions(+), 54 deletions(-) diff --git a/lbrynet/core/BlobAvailability.py b/lbrynet/core/BlobAvailability.py index 5ce8b95ea7..cc9d446d12 100644 --- a/lbrynet/core/BlobAvailability.py +++ b/lbrynet/core/BlobAvailability.py @@ -23,18 +23,14 @@ def __init__(self, blob_manager, peer_finder, dht_node): self._blob_manager = blob_manager self._peer_finder = peer_finder self._dht_node = dht_node - self._check_popular = LoopingCall(self._update_most_popular) self._check_mine = LoopingCall(self._update_mine) def start(self): log.info("Starting blob availability tracker.") - self._check_popular.start(600) self._check_mine.start(600) def stop(self): log.info("Stopping blob availability tracker.") - if self._check_popular.running: - self._check_popular.stop() if self._check_mine.running: self._check_mine.stop() @@ -68,17 +64,6 @@ def _save_peer_info(blob_hash, peers): d.addCallback(lambda peers: _save_peer_info(blob, peers)) return d - def _get_most_popular(self): - dl = [] - for (hash, _) in self._dht_node.get_most_popular_hashes(10): - encoded = hash.encode('hex') - dl.append(self._update_peers_for_blob(encoded)) - return defer.DeferredList(dl) - - def _update_most_popular(self): - d = self._get_most_popular() - d.addCallback(lambda _: self._set_mean_peers()) - def _update_mine(self): def _get_peers(blobs): dl = [] diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 7c48e8c31d..804ba85efa 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -261,9 +261,6 @@ def getPeersForBlob(self, blob_hash, include_node_ids=False): expanded_peers.append((host, port, peer_node_id)) defer.returnValue(expanded_peers) - def get_most_popular_hashes(self, num_to_return): - return self.hash_watcher.most_popular_hashes(num_to_return) - @defer.inlineCallbacks def iterativeAnnounceHaveBlob(self, blob_hash, value): known_nodes = {} diff --git a/lbrynet/dht/peerfinder.py b/lbrynet/dht/peerfinder.py index afbbddd6b3..4074807ea4 100644 --- a/lbrynet/dht/peerfinder.py +++ b/lbrynet/dht/peerfinder.py @@ -4,7 +4,6 @@ from zope.interface import implements from twisted.internet import defer from lbrynet.interfaces import IPeerFinder -from lbrynet.core.utils import short_hash log = logging.getLogger(__name__) @@ -13,18 +12,9 @@ class DummyPeerFinder(object): """This class finds peers which have announced to the DHT that they have certain blobs""" - def run_manage_loop(self): - pass - - def stop(self): - pass - - def find_peers_for_blob(self, blob_hash): + def find_peers_for_blob(self, blob_hash, timeout=None, filter_self=True): return defer.succeed([]) - def get_most_popular_hashes(self, num_to_return): - return [] - class DHTPeerFinder(DummyPeerFinder): """This class finds peers which have announced to the DHT that they have certain blobs""" @@ -39,11 +29,8 @@ def __init__(self, dht_node, peer_manager): self.peer_manager = peer_manager self.peers = [] - def stop(self): - pass - @defer.inlineCallbacks - def find_peers_for_blob(self, blob_hash, timeout=None, filter_self=False): + def find_peers_for_blob(self, blob_hash, timeout=None, filter_self=True): """ Find peers for blob in the DHT blob_hash (str): blob hash to look for @@ -54,32 +41,19 @@ def find_peers_for_blob(self, blob_hash, timeout=None, filter_self=False): Returns: list of peers for the blob """ - def _trigger_timeout(): - if not finished_deferred.called: - log.debug("Peer search for %s timed out", short_hash(blob_hash)) - finished_deferred.cancel() - bin_hash = binascii.unhexlify(blob_hash) - finished_deferred = self.dht_node.getPeersForBlob(bin_hash) - - if timeout is not None: - self.dht_node.reactor_callLater(timeout, _trigger_timeout) - + finished_deferred = self.dht_node.iterativeFindValue(bin_hash) + if timeout: + finished_deferred.addTimeout(timeout, self.dht_node.clock) try: peer_list = yield finished_deferred - except defer.CancelledError: + except defer.TimeoutError: peer_list = [] peers = set(peer_list) - good_peers = [] - for host, port in peers: + results = [] + for node_id, host, port in peers: if filter_self and (host, port) == (self.dht_node.externalIP, self.dht_node.peerPort): continue - peer = self.peer_manager.get_peer(host, port) - if peer.is_available() is True: - good_peers.append(peer) - - defer.returnValue(good_peers) - - def get_most_popular_hashes(self, num_to_return): - return self.dht_node.get_most_popular_hashes(num_to_return) + results.append(self.peer_manager.get_peer(host, port)) + defer.returnValue(results) From b673c508cc8d9ca957fb3f7693a9504f1d6505a0 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 16:51:25 -0400 Subject: [PATCH 03/79] disable NegotiatedPaymentRateManager, use OnlyFreePaymentsManager for now --- lbrynet/core/PaymentRateManager.py | 1 + lbrynet/core/Session.py | 34 ++++++++++---------- lbrynet/file_manager/EncryptedFileManager.py | 6 ++-- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/lbrynet/core/PaymentRateManager.py b/lbrynet/core/PaymentRateManager.py index f72bb5154a..1d33203903 100644 --- a/lbrynet/core/PaymentRateManager.py +++ b/lbrynet/core/PaymentRateManager.py @@ -93,6 +93,7 @@ def __init__(self, **kwargs): self.base = BasePaymentRateManager(0.0, 0.0) self.points_paid = 0.0 + self.min_blob_data_payment_rate = 0.0 self.generous = True self.strategy = OnlyFreeStrategy() diff --git a/lbrynet/core/Session.py b/lbrynet/core/Session.py index f65a331fee..0543ad2116 100644 --- a/lbrynet/core/Session.py +++ b/lbrynet/core/Session.py @@ -6,7 +6,7 @@ from lbrynet.database.storage import SQLiteStorage from lbrynet.core.RateLimiter import RateLimiter from lbrynet.core.utils import generate_id -from lbrynet.core.PaymentRateManager import BasePaymentRateManager, NegotiatedPaymentRateManager +from lbrynet.core.PaymentRateManager import BasePaymentRateManager, OnlyFreePaymentsManager from lbrynet.core.BlobAvailability import BlobAvailabilityTracker log = logging.getLogger(__name__) @@ -107,8 +107,8 @@ def __init__(self, blob_data_payment_rate, db_dir=None, self.known_dht_nodes = [] self.blob_dir = blob_dir self.blob_manager = blob_manager - self.blob_tracker = None - self.blob_tracker_class = blob_tracker_class or BlobAvailabilityTracker + # self.blob_tracker = None + # self.blob_tracker_class = blob_tracker_class or BlobAvailabilityTracker self.peer_port = peer_port self.use_upnp = use_upnp self.rate_limiter = rate_limiter @@ -118,9 +118,9 @@ def __init__(self, blob_data_payment_rate, db_dir=None, self.dht_node_class = dht_node_class self.dht_node = None self.base_payment_rate_manager = BasePaymentRateManager(blob_data_payment_rate) - self.payment_rate_manager = None - self.payment_rate_manager_class = payment_rate_manager_class or NegotiatedPaymentRateManager - self.is_generous = is_generous + self.payment_rate_manager = OnlyFreePaymentsManager() + # self.payment_rate_manager_class = payment_rate_manager_class or NegotiatedPaymentRateManager + # self.is_generous = is_generous self.storage = storage or SQLiteStorage(self.db_dir) self._join_dht_deferred = None @@ -147,8 +147,8 @@ def shut_down(self): ds = [] if self.hash_announcer: self.hash_announcer.stop() - if self.blob_tracker is not None: - ds.append(defer.maybeDeferred(self.blob_tracker.stop)) + # if self.blob_tracker is not None: + # ds.append(defer.maybeDeferred(self.blob_tracker.stop)) if self.dht_node is not None: ds.append(defer.maybeDeferred(self.dht_node.stop)) if self.rate_limiter is not None: @@ -251,19 +251,19 @@ def _setup_other_components(self): else: self.blob_manager = DiskBlobManager(self.blob_dir, self.storage) - if self.blob_tracker is None: - self.blob_tracker = self.blob_tracker_class( - self.blob_manager, self.dht_node.peer_finder, self.dht_node - ) - if self.payment_rate_manager is None: - self.payment_rate_manager = self.payment_rate_manager_class( - self.base_payment_rate_manager, self.blob_tracker, self.is_generous - ) + # if self.blob_tracker is None: + # self.blob_tracker = self.blob_tracker_class( + # self.blob_manager, self.dht_node.peer_finder, self.dht_node + # ) + # if self.payment_rate_manager is None: + # self.payment_rate_manager = self.payment_rate_manager_class( + # self.base_payment_rate_manager, self.blob_tracker, self.is_generous + # ) self.rate_limiter.start() d = self.blob_manager.setup() d.addCallback(lambda _: self.wallet.start()) - d.addCallback(lambda _: self.blob_tracker.start()) + # d.addCallback(lambda _: self.blob_tracker.start()) return d def _unset_upnp(self): diff --git a/lbrynet/file_manager/EncryptedFileManager.py b/lbrynet/file_manager/EncryptedFileManager.py index 02245c39c1..9c1674f7f4 100644 --- a/lbrynet/file_manager/EncryptedFileManager.py +++ b/lbrynet/file_manager/EncryptedFileManager.py @@ -7,7 +7,7 @@ from twisted.internet import defer, task, reactor from twisted.python.failure import Failure from lbrynet.reflector.reupload import reflect_file -from lbrynet.core.PaymentRateManager import NegotiatedPaymentRateManager +# from lbrynet.core.PaymentRateManager import NegotiatedPaymentRateManager from lbrynet.file_manager.EncryptedFileDownloader import ManagedEncryptedFileDownloader from lbrynet.file_manager.EncryptedFileDownloader import ManagedEncryptedFileDownloaderFactory from lbrynet.core.StreamDescriptor import EncryptedFileStreamType, get_sd_info @@ -118,12 +118,12 @@ def _start_lbry_files(self): files = yield self.session.storage.get_all_lbry_files() claim_infos = yield self.session.storage.get_claims_from_stream_hashes([file['stream_hash'] for file in files]) b_prm = self.session.base_payment_rate_manager - payment_rate_manager = NegotiatedPaymentRateManager(b_prm, self.session.blob_tracker) + # payment_rate_manager = NegotiatedPaymentRateManager(b_prm, self.session.blob_tracker) log.info("Starting %i files", len(files)) for file_info in files: claim_info = claim_infos.get(file_info['stream_hash']) - self._start_lbry_file(file_info, payment_rate_manager, claim_info) + self._start_lbry_file(file_info, b_prm, claim_info) log.info("Started %i lbry files", len(self.lbry_files)) if self.auto_re_reflect is True: From a503a800ca0c312842d4ceb338002b6868a81ca4 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 16:52:04 -0400 Subject: [PATCH 04/79] disable Cryptonator exchange rate feed --- lbrynet/daemon/ExchangeRateManager.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lbrynet/daemon/ExchangeRateManager.py b/lbrynet/daemon/ExchangeRateManager.py index 69310ea8cb..486659a0e2 100644 --- a/lbrynet/daemon/ExchangeRateManager.py +++ b/lbrynet/daemon/ExchangeRateManager.py @@ -206,7 +206,12 @@ def _handle_response(self, response): class ExchangeRateManager(object): def __init__(self): self.market_feeds = [ - LBRYioBTCFeed(), LBRYioFeed(), BittrexFeed(), CryptonatorBTCFeed(), CryptonatorFeed()] + LBRYioBTCFeed(), + LBRYioFeed(), + BittrexFeed(), + # CryptonatorBTCFeed(), + # CryptonatorFeed() + ] def start(self): log.info("Starting exchange rate manager") From 9ed08f8fc9ff4debd01df02c07262f119fbb5c3f Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 16:52:40 -0400 Subject: [PATCH 05/79] remove unused constant --- lbrynet/dht/constants.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lbrynet/dht/constants.py b/lbrynet/dht/constants.py index f84c89d2eb..9ce21d96b1 100644 --- a/lbrynet/dht/constants.py +++ b/lbrynet/dht/constants.py @@ -43,8 +43,6 @@ tokenSecretChangeInterval = 300 # 5 minutes -peer_request_timeout = 10 - ######## IMPLEMENTATION-SPECIFIC CONSTANTS ########### #: The interval in which the node should check its whether any buckets need refreshing, From 159e153393126475137dbbc192dda83587760595 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 16:53:35 -0400 Subject: [PATCH 06/79] make DataStore clock mockable --- lbrynet/dht/datastore.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lbrynet/dht/datastore.py b/lbrynet/dht/datastore.py index a539424555..34304f29f5 100644 --- a/lbrynet/dht/datastore.py +++ b/lbrynet/dht/datastore.py @@ -1,5 +1,4 @@ import UserDict -import time import constants from interface import IDataStore from zope.interface import implements @@ -9,17 +8,21 @@ class DictDataStore(UserDict.DictMixin): """ A datastore using an in-memory Python dictionary """ implements(IDataStore) - def __init__(self): + def __init__(self, getTime=None): # Dictionary format: # { : (, , ) } self._dict = {} + if not getTime: + from twisted.internet import reactor + getTime = reactor.seconds + self._getTime = getTime def keys(self): """ Return a list of the keys in this data store """ return self._dict.keys() def removeExpiredPeers(self): - now = int(time.time()) + now = int(self._getTime()) def notExpired(peer): if (now - peer[2]) > constants.dataExpireTimeout: From d65dc0aec3be47c885cb4021889a13059eda9734 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 16:55:36 -0400 Subject: [PATCH 07/79] disable loading DictDataStore in Node.__init__ -to be re-done when the datastore uses sqlite --- lbrynet/dht/node.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 804ba85efa..c7393366b7 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -122,20 +122,9 @@ def __init__(self, node_id=None, udpPort=4000, dataStore=None, # Initialize the data storage mechanism used by this node self.token_secret = self._generateID() self.old_token_secret = None - if dataStore is None: - self._dataStore = datastore.DictDataStore() - else: - self._dataStore = dataStore - # Try to restore the node's state... - if 'nodeState' in self._dataStore: - state = self._dataStore['nodeState'] - self.node_id = state['id'] - for contactTriple in state['closestNodes']: - contact = Contact( - contactTriple[0], contactTriple[1], contactTriple[2], self._protocol) - self._routingTable.addContact(contact) self.externalIP = externalIP self.peerPort = peerPort + self._dataStore = dataStore or datastore.DictDataStore() self.peer_manager = peer_manager or PeerManager() self.peer_finder = peer_finder or DHTPeerFinder(self, self.peer_manager) From e52689a33d67ebb509312d96fb3c68b9f8f85968 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 16:57:27 -0400 Subject: [PATCH 08/79] remove OptimizedTreeRoutingTable for now, use TreeRoutingTable --- lbrynet/dht/node.py | 2 +- lbrynet/dht/routingtable.py | 72 ------------------------------------- 2 files changed, 1 insertion(+), 73 deletions(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index c7393366b7..03f56ca9d2 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -110,7 +110,7 @@ def __init__(self, node_id=None, udpPort=4000, dataStore=None, # Create k-buckets (for storing contacts) if routingTableClass is None: - self._routingTable = routingtable.OptimizedTreeRoutingTable(self.node_id, self.clock.seconds) + self._routingTable = routingtable.TreeRoutingTable(self.node_id, self.clock.seconds) else: self._routingTable = routingTableClass(self.node_id, self.clock.seconds) diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 02f8e96861..05acb56f69 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -285,75 +285,3 @@ def _splitBucket(self, oldBucketIndex): oldBucket.removeContact(contact) -class OptimizedTreeRoutingTable(TreeRoutingTable): - """ A version of the "tree"-type routing table specified by Kademlia, - along with contact accounting optimizations specified in section 4.1 of - of the 13-page version of the Kademlia paper. - """ - - def __init__(self, parentNodeID, getTime=None): - TreeRoutingTable.__init__(self, parentNodeID, getTime) - # Cache containing nodes eligible to replace stale k-bucket entries - self._replacementCache = {} - - def addContact(self, contact): - """ Add the given contact to the correct k-bucket; if it already - exists, its status will be updated - - @param contact: The contact to add to this node's k-buckets - @type contact: kademlia.contact.Contact - """ - - if contact.id == self._parentNodeID: - return - - # Initialize/reset the "successively failed RPC" counter - contact.failedRPCs = 0 - - bucketIndex = self._kbucketIndex(contact.id) - try: - self._buckets[bucketIndex].addContact(contact) - except kbucket.BucketFull: - # The bucket is full; see if it can be split (by checking - # if its range includes the host node's id) - if self._buckets[bucketIndex].keyInRange(self._parentNodeID): - self._splitBucket(bucketIndex) - # Retry the insertion attempt - self.addContact(contact) - else: - # We can't split the k-bucket - # NOTE: This implementation follows section 4.1 of the 13 page version - # of the Kademlia paper (optimized contact accounting without PINGs - # - results in much less network traffic, at the expense of some memory) - - # Put the new contact in our replacement cache for the - # corresponding k-bucket (or update it's position if - # it exists already) - if bucketIndex not in self._replacementCache: - self._replacementCache[bucketIndex] = [] - if contact in self._replacementCache[bucketIndex]: - self._replacementCache[bucketIndex].remove(contact) - elif len(self._replacementCache[bucketIndex]) >= constants.replacementCacheSize: - self._replacementCache[bucketIndex].pop(0) - self._replacementCache[bucketIndex].append(contact) - - def removeContact(self, contactID): - """ Remove the contact with the specified node ID from the routing - table - - @param contactID: The node ID of the contact to remove - @type contactID: str - """ - bucketIndex = self._kbucketIndex(contactID) - try: - contact = self._buckets[bucketIndex].getContact(contactID) - except ValueError: - return - contact.failedRPCs += 1 - if contact.failedRPCs >= constants.rpcAttempts: - self._buckets[bucketIndex].removeContact(contactID) - # Replace this stale contact with one from our replacement cache, if we have any - if bucketIndex in self._replacementCache: - if len(self._replacementCache[bucketIndex]) > 0: - self._buckets[bucketIndex].addContact( - self._replacementCache[bucketIndex].pop()) From 406ddaa4ef5d82543e4f301a491c7070b711526f Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:02:25 -0400 Subject: [PATCH 09/79] use base class to simplify Node init -add looping call helpers which use the same clock as the Node --- lbrynet/dht/node.py | 81 +++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 03f56ca9d2..31f1b238a5 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -41,7 +41,46 @@ def rpcmethod(func): return func -class Node(object): +class MockKademliaHelper(object): + def __init__(self, clock=None, callLater=None, resolve=None, listenUDP=None): + if not listenUDP or not resolve or not callLater or not clock: + from twisted.internet import reactor + listenUDP = listenUDP or reactor.listenUDP + resolve = resolve or reactor.resolve + callLater = callLater or reactor.callLater + clock = clock or reactor + + self.clock = clock + self.reactor_listenUDP = listenUDP + self.reactor_resolve = resolve + + CallLaterManager.setup(callLater) + self.reactor_callLater = CallLaterManager.call_later + self.reactor_callSoon = CallLaterManager.call_soon + + self._listeningPort = None # object implementing Twisted + # IListeningPort This will contain a deferred created when + # joining the network, to enable publishing/retrieving + # information from the DHT as soon as the node is part of the + # network (add callbacks to this deferred if scheduling such + # operations before the node has finished joining the network) + + def get_looping_call(self, fn, *args, **kwargs): + lc = task.LoopingCall(fn, *args, **kwargs) + lc.clock = self.clock + return lc + + def safe_stop_looping_call(self, lc): + if lc and lc.running: + return lc.stop() + return defer.succeed(None) + + def safe_start_looping_call(self, lc, t): + if lc and not lc.running: + lc.start(t) + + +class Node(MockKademliaHelper): """ Local node in the Kademlia network This class represents a single local node in a Kademlia network; in other @@ -54,7 +93,7 @@ class Node(object): def __init__(self, node_id=None, udpPort=4000, dataStore=None, routingTableClass=None, networkProtocol=None, - externalIP=None, peerPort=None, listenUDP=None, + externalIP=None, peerPort=3333, listenUDP=None, callLater=None, resolve=None, clock=None, peer_finder=None, peer_manager=None): """ @@ -82,31 +121,11 @@ def __init__(self, node_id=None, udpPort=4000, dataStore=None, @param peerPort: the port at which this node announces it has a blob for """ - if not listenUDP or not resolve or not callLater or not clock: - from twisted.internet import reactor - listenUDP = listenUDP or reactor.listenUDP - resolve = resolve or reactor.resolve - callLater = callLater or reactor.callLater - clock = clock or reactor - self.clock = clock - CallLaterManager.setup(callLater) - self.reactor_resolve = resolve - self.reactor_listenUDP = listenUDP - self.reactor_callLater = CallLaterManager.call_later - self.reactor_callSoon = CallLaterManager.call_soon + MockKademliaHelper.__init__(self, clock, callLater, resolve, listenUDP) self.node_id = node_id or self._generateID() self.port = udpPort - self._listeningPort = None # object implementing Twisted - # IListeningPort This will contain a deferred created when - # joining the network, to enable publishing/retrieving - # information from the DHT as soon as the node is part of the - # network (add callbacks to this deferred if scheduling such - # operations before the node has finished joining the network) - self._joinDeferred = defer.Deferred(None) - self.change_token_lc = task.LoopingCall(self.change_token) - self.change_token_lc.clock = self.clock - self.refresh_node_lc = task.LoopingCall(self._refreshNode) - self.refresh_node_lc.clock = self.clock + self._change_token_lc = self.get_looping_call(self.change_token) + self._refresh_node_lc = self.get_looping_call(self._refreshNode) # Create k-buckets (for storing contacts) if routingTableClass is None: @@ -127,6 +146,7 @@ def __init__(self, node_id=None, udpPort=4000, dataStore=None, self._dataStore = dataStore or datastore.DictDataStore() self.peer_manager = peer_manager or PeerManager() self.peer_finder = peer_finder or DHTPeerFinder(self, self.peer_manager) + self._join_deferred = None def __del__(self): log.warning("unclean shutdown of the dht node") @@ -136,10 +156,8 @@ def __del__(self): @defer.inlineCallbacks def stop(self): # stop LoopingCalls: - if self.refresh_node_lc.running: - yield self.refresh_node_lc.stop() - if self.change_token_lc.running: - yield self.change_token_lc.stop() + yield self.safe_stop_looping_call(self._refresh_node_lc) + yield self.safe_stop_looping_call(self._change_token_lc) if self._listeningPort is not None: yield self._listeningPort.stopListening() @@ -204,11 +222,10 @@ def joinNetwork(self, known_node_addresses=None): self.start_listening() # #TODO: Refresh all k-buckets further away than this node's closest neighbour + self.safe_start_looping_call(self._change_token_lc, constants.tokenSecretChangeInterval) # Start refreshing k-buckets periodically, if necessary self.bootstrap_join(known_node_addresses or [], self._joinDeferred) - yield self._joinDeferred - self.change_token_lc.start(constants.tokenSecretChangeInterval) - self.refresh_node_lc.start(constants.checkRefreshInterval) + self.safe_start_looping_call(self._refresh_node_lc, constants.checkRefreshInterval) @property def contacts(self): From ad2dcf0893e48467a3113a4d9742030ef3f8c0e3 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:06:45 -0400 Subject: [PATCH 10/79] add the parent node id to KBucket --- lbrynet/dht/kbucket.py | 3 ++- lbrynet/dht/routingtable.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lbrynet/dht/kbucket.py b/lbrynet/dht/kbucket.py index ead7638955..de5484bb00 100644 --- a/lbrynet/dht/kbucket.py +++ b/lbrynet/dht/kbucket.py @@ -6,7 +6,7 @@ class KBucket(object): """ Description - later """ - def __init__(self, rangeMin, rangeMax): + def __init__(self, rangeMin, rangeMax, node_id): """ @param rangeMin: The lower boundary for the range in the n-bit ID space covered by this k-bucket @@ -17,6 +17,7 @@ def __init__(self, rangeMin, rangeMax): self.rangeMin = rangeMin self.rangeMax = rangeMax self._contacts = list() + self._node_id = node_id def addContact(self, contact): """ Add contact to _contact list in the right order. This will move the diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 05acb56f69..16e3ef1cb7 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -40,8 +40,8 @@ def __init__(self, parentNodeID, getTime=None): @type parentNodeID: str """ # Create the initial (single) k-bucket covering the range of the entire n-bit ID space - self._buckets = [kbucket.KBucket(rangeMin=0, rangeMax=2 ** constants.key_bits)] self._parentNodeID = parentNodeID + self._buckets = [kbucket.KBucket(rangeMin=0, rangeMax=2 ** constants.key_bits, node_id=self._parentNodeID)] if not getTime: from time import time as getTime self._getTime = getTime @@ -272,7 +272,7 @@ def _splitBucket(self, oldBucketIndex): oldBucket = self._buckets[oldBucketIndex] splitPoint = oldBucket.rangeMax - (oldBucket.rangeMax - oldBucket.rangeMin) / 2 # Create a new k-bucket to cover the range split off from the old bucket - newBucket = kbucket.KBucket(splitPoint, oldBucket.rangeMax) + newBucket = kbucket.KBucket(splitPoint, oldBucket.rangeMax, self._parentNodeID) oldBucket.rangeMax = splitPoint # Now, add the new bucket into the routing table tree self._buckets.insert(oldBucketIndex + 1, newBucket) From 23c202b5e4b46ac61d1d781fccbc2e1e0dda9dc4 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:32:55 -0400 Subject: [PATCH 11/79] refactor Contact class, DHT RPCs, and Contact addition/removal -track contact failures, last replied, and last requested. use this to provide a 'contact_is_good' property on Contact objects -ensure no duplicate contact objects are created -remove confusing conflation of node id strings with Contact objects, update docstrings -move RPC failure tracking to a callback/errback pair in sendRPC (so the contact is only updated once) -handle seed nodes during the join sequence by setting their node ids after they initially reply to our ping -name all of the kademlia RPC keyword args, remove confusing **kwargs and dictionary parsing -add host ip/port to DHT send/receive logging to make the results comprehensible when running many nodes at once --- lbrynet/dht/contact.py | 112 +++++++++++++++++++++++++-- lbrynet/dht/error.py | 5 +- lbrynet/dht/kbucket.py | 31 ++++++-- lbrynet/dht/node.py | 123 ++++++++++++----------------- lbrynet/dht/protocol.py | 149 ++++++++++++++++++++++++------------ lbrynet/dht/routingtable.py | 14 ++-- 6 files changed, 293 insertions(+), 141 deletions(-) diff --git a/lbrynet/dht/contact.py b/lbrynet/dht/contact.py index cba054e0d4..2ee26c6789 100644 --- a/lbrynet/dht/contact.py +++ b/lbrynet/dht/contact.py @@ -1,19 +1,78 @@ -class Contact(object): +from lbrynet.dht import constants + + +class _Contact(object): """ Encapsulation for remote contact This class contains information on a single remote contact, and also provides a direct RPC API to the remote node which it represents """ - def __init__(self, id, ipAddress, udpPort, networkProtocol, firstComm=0): - self.id = id + def __init__(self, contactManager, id, ipAddress, udpPort, networkProtocol, firstComm): + self._contactManager = contactManager + self._id = id + if id is not None: + if not len(id) == constants.key_bits / 8: + raise ValueError("invalid node id: %s", id.encode('hex')) self.address = ipAddress self.port = udpPort self._networkProtocol = networkProtocol self.commTime = firstComm + self.getTime = self._contactManager._get_time + self.lastReplied = None + self.lastRequested = None + + @property + def lastInteracted(self): + return max(self.lastRequested or 0, self.lastReplied or 0, self.lastFailed or 0) + + @property + def id(self): + return self._id + + def log_id(self, short=True): + if not self.id: + return "not initialized" + id_hex = self.id.encode('hex') + return id_hex if not short else id_hex[:8] + + @property + def failedRPCs(self): + return len(self.failures) + + @property + def lastFailed(self): + return self._contactManager._rpc_failures.get((self.address, self.port), [None])[-1] + + @property + def failures(self): + return self._contactManager._rpc_failures.get((self.address, self.port), []) + + @property + def contact_is_good(self): + """ + :return: False if contact is bad, None if contact is unknown, or True if contact is good + """ + failures = self.failures + now = self.getTime() + delay = constants.refreshTimeout / 4 + + if failures: + if self.lastReplied and len(failures) >= 2 and self.lastReplied < failures[-2]: + return False + elif self.lastReplied and len(failures) >= 2 and self.lastReplied > failures[-2]: + pass # handled below + elif len(failures) >= 2: + return False + + if self.lastReplied and self.lastReplied > now - delay: + return True + if self.lastReplied and self.lastRequested and self.lastRequested > now - delay: + return True + return None def __eq__(self, other): - if isinstance(other, Contact): + if isinstance(other, _Contact): return self.id == other.id elif isinstance(other, str): return self.id == other @@ -21,7 +80,7 @@ def __eq__(self, other): return False def __ne__(self, other): - if isinstance(other, Contact): + if isinstance(other, _Contact): return self.id != other.id elif isinstance(other, str): return self.id != other @@ -33,6 +92,21 @@ def compact_ip(self): lambda buff, x: buff + bytearray([int(x)]), self.address.split('.'), bytearray()) return str(compact_ip) + def set_id(self, id): + if not self._id: + self._id = id + + def update_last_replied(self): + self.lastReplied = int(self.getTime()) + + def update_last_requested(self): + self.lastRequested = int(self.getTime()) + + def update_last_failed(self): + failures = self._contactManager._rpc_failures.get((self.address, self.port), []) + failures.append(self.getTime()) + self._contactManager._rpc_failures[(self.address, self.port)] = failures + def __str__(self): return '<%s.%s object; IP address: %s, UDP port: %d>' % ( self.__module__, self.__class__.__name__, self.address, self.port) @@ -56,3 +130,31 @@ def _sendRPC(*args, **kwargs): return self._networkProtocol.sendRPC(self, name, args, **kwargs) return _sendRPC + + +class ContactManager(object): + def __init__(self, get_time=None): + if not get_time: + from twisted.internet import reactor + get_time = reactor.seconds + self._get_time = get_time + self._contacts = {} + self._rpc_failures = {} + + def get_contact(self, id, address, port): + for contact in self._contacts.itervalues(): + if contact.id == id and contact.address == address and contact.port == port: + return contact + + def make_contact(self, id, ipAddress, udpPort, networkProtocol, firstComm=0): + ipAddress = str(ipAddress) + contact = self.get_contact(id, ipAddress, udpPort) + if contact: + return contact + contact = _Contact(self, id, ipAddress, udpPort, networkProtocol, firstComm or self._get_time()) + self._contacts[(id, ipAddress, udpPort)] = contact + return contact + + def is_ignored(self, origin_tuple): + failed_rpc_count = len(self._rpc_failures.get(origin_tuple, [])) + return failed_rpc_count > constants.rpcAttempts diff --git a/lbrynet/dht/error.py b/lbrynet/dht/error.py index 3111adf8f6..3d44cf3f11 100644 --- a/lbrynet/dht/error.py +++ b/lbrynet/dht/error.py @@ -33,6 +33,9 @@ class TimeoutError(Exception): def __init__(self, remote_contact_id): # remote_contact_id is a binary blob so we need to convert it # into something more readable - msg = 'Timeout connecting to {}'.format(binascii.hexlify(remote_contact_id)) + if remote_contact_id: + msg = 'Timeout connecting to {}'.format(binascii.hexlify(remote_contact_id)) + else: + msg = 'Timeout connecting to uninitialized node' Exception.__init__(self, msg) self.remote_contact_id = remote_contact_id diff --git a/lbrynet/dht/kbucket.py b/lbrynet/dht/kbucket.py index de5484bb00..bb4cfc0dc4 100644 --- a/lbrynet/dht/kbucket.py +++ b/lbrynet/dht/kbucket.py @@ -42,9 +42,19 @@ def addContact(self, contact): raise BucketFull("No space in bucket to insert contact") def getContact(self, contactID): - """ Get the contact specified node ID""" - index = self._contacts.index(contactID) - return self._contacts[index] + """Get the contact specified node ID + + @raise IndexError: raised if the contact is not in the bucket + + @param contactID: the node id of the contact to retrieve + @type contactID: str + + @rtype: dht.contact._Contact + """ + for contact in self._contacts: + if contact.id == contactID: + return contact + raise IndexError(contactID) def getContacts(self, count=-1, excludeContact=None): """ Returns a list containing up to the first count number of contacts @@ -92,14 +102,18 @@ def getContacts(self, count=-1, excludeContact=None): if excludeContact in contactList: contactList.remove(excludeContact) + def getBadOrUnknownContacts(self): + contacts = self.getContacts(sort_distance_to=False) + results = [contact for contact in contacts if contact.contact_is_good is False] + results.extend(contact for contact in contacts if contact.contact_is_good is None) + return results return contactList def removeContact(self, contact): - """ Remove given contact from list + """ Remove the contact from the bucket - @param contact: The contact to remove, or a string containing the - contact's node ID - @type contact: kademlia.contact.Contact or str + @param contact: The contact to remove + @type contact: dht.contact._Contact @raise ValueError: The specified contact is not in this bucket """ @@ -124,3 +138,6 @@ def keyInRange(self, key): def __len__(self): return len(self._contacts) + + def __contains__(self, item): + return item in self._contacts diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 31f1b238a5..b24b923d1c 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -24,7 +24,7 @@ import protocol from error import TimeoutError from peerfinder import DHTPeerFinder -from contact import Contact +from contact import ContactManager from distance import Distance @@ -51,6 +51,7 @@ def __init__(self, clock=None, callLater=None, resolve=None, listenUDP=None): clock = clock or reactor self.clock = clock + self.contact_manager = ContactManager(self.clock.seconds) self.reactor_listenUDP = listenUDP self.reactor_resolve = resolve @@ -276,8 +277,10 @@ def iterativeAnnounceHaveBlob(self, blob_hash, value): is_closer = Distance(blob_hash).is_closer(self.node_id, contacts[-1].id) if is_closer: contacts.pop() - yield self.store(blob_hash, value, originalPublisherID=self.node_id, - self_store=True) + self_contact = self.contact_manager.make_contact(self.node_id, self.externalIP, + self.port, self._protocol) + token = self.make_token(self_contact.compact_ip()) + yield self.store(self_contact, blob_hash, token, self.peerPort) elif self.externalIP is not None: pass else: @@ -403,17 +406,17 @@ def addContact(self, contact): @param contact: The contact to add to this node's k-buckets @type contact: kademlia.contact.Contact """ - self._routingTable.addContact(contact) + return self._routingTable.addContact(contact) - def removeContact(self, contactID): + def removeContact(self, contact): """ Remove the contact with the specified node ID from this node's table of known nodes. This is a simple wrapper for the same method in this object's RoutingTable object - @param contactID: The node ID of the contact to remove - @type contactID: str + @param contact: The Contact object to remove + @type contact: _Contact """ - self._routingTable.removeContact(contactID) + self._routingTable.removeContact(contact) def findContact(self, contactID): """ Find a entangled.kademlia.contact.Contact object for the specified @@ -430,10 +433,11 @@ def findContact(self, contactID): contact = self._routingTable.getContact(contactID) df = defer.Deferred() df.callback(contact) - except ValueError: + except (ValueError, IndexError): def parseResults(nodes): + node_ids = [c.id for c in nodes] if contactID in nodes: - contact = nodes[nodes.index(contactID)] + contact = nodes[node_ids.index(contactID)] return contact else: return None @@ -451,11 +455,11 @@ def ping(self): return 'pong' @rpcmethod - def store(self, key, value, originalPublisherID=None, self_store=False, **kwargs): + def store(self, rpc_contact, blob_hash, token, port, originalPublisherID=None, age=0): """ Store the received data in this node's local hash table - @param key: The hashtable key of the data - @type key: str + @param blob_hash: The hashtable key of the data + @type blob_hash: str @param value: The actual data (the value associated with C{key}) @type value: str @param originalPublisherID: The node ID of the node that is the @@ -473,54 +477,24 @@ def store(self, key, value, originalPublisherID=None, self_store=False, **kwargs (which is the case currently) might not be a good idea... will have to fix this (perhaps use a stream from the Protocol class?) """ - # Get the sender's ID (if any) if originalPublisherID is None: - if '_rpcNodeID' in kwargs: - originalPublisherID = kwargs['_rpcNodeID'] - else: - raise TypeError, 'No NodeID given. Therefore we can\'t store this node' - - if self_store is True and self.externalIP: - contact = Contact(self.node_id, self.externalIP, self.port, None, None) - compact_ip = contact.compact_ip() - elif '_rpcNodeContact' in kwargs: - contact = kwargs['_rpcNodeContact'] - compact_ip = contact.compact_ip() - else: - raise TypeError, 'No contact info available' - - if not self_store: - if 'token' not in value: - raise ValueError("Missing token") - if not self.verify_token(value['token'], compact_ip): - raise ValueError("Invalid token") - - if 'port' in value: - port = int(value['port']) - if 0 <= port <= 65536: - compact_port = str(struct.pack('>H', port)) - else: - raise TypeError('Invalid port') - else: - raise TypeError('No port available') - - if 'lbryid' in value: - if len(value['lbryid']) != constants.key_bits / 8: - raise ValueError('Invalid lbryid (%i bytes): %s' % (len(value['lbryid']), - value['lbryid'].encode('hex'))) - else: - compact_address = compact_ip + compact_port + value['lbryid'] + originalPublisherID = rpc_contact.id + compact_ip = rpc_contact.compact_ip() + if not self.verify_token(token, compact_ip): + raise ValueError("Invalid token") + if 0 <= port <= 65536: + compact_port = str(struct.pack('>H', port)) else: - raise TypeError('No lbryid given') + raise TypeError('Invalid port') + compact_address = compact_ip + compact_port + rpc_contact.id now = int(time.time()) - originallyPublished = now # - age - self._dataStore.addPeerToBlob(key, compact_address, now, originallyPublished, - originalPublisherID) + originallyPublished = now - age + self._dataStore.addPeerToBlob(blob_hash, compact_address, now, originallyPublished, originalPublisherID) return 'OK' @rpcmethod - def findNode(self, key, **kwargs): + def findNode(self, rpc_contact, key): """ Finds a number of known nodes closest to the node/value with the specified key. @@ -533,20 +507,17 @@ def findNode(self, key, **kwargs): node is returning all of the contacts that it knows of. @rtype: list """ + if len(key) != constants.key_bits / 8: + raise ValueError("invalid contact id length: %i" % len(key)) - # Get the sender's ID (if any) - if '_rpcNodeID' in kwargs: - rpc_sender_id = kwargs['_rpcNodeID'] - else: - rpc_sender_id = None - contacts = self._routingTable.findCloseNodes(key, constants.k, rpc_sender_id) + contacts = self._routingTable.findCloseNodes(key, constants.k, rpc_contact.id) contact_triples = [] for contact in contacts: contact_triples.append((contact.id, contact.address, contact.port)) return contact_triples @rpcmethod - def findValue(self, key, **kwargs): + def findValue(self, rpc_contact, key): """ Return the value associated with the specified key if present in this node's data, otherwise execute FIND_NODE for the key @@ -558,16 +529,18 @@ def findValue(self, key, **kwargs): @rtype: dict or list """ + if len(key) != constants.key_bits / 8: + raise ValueError("invalid blob hash length: %i" % len(key)) + + response = { + 'token': self.make_token(rpc_contact.compact_ip()), + } + if self._dataStore.hasPeersForBlob(key): - rval = {key: self._dataStore.getPeersForBlob(key)} + response[key] = self._dataStore.getPeersForBlob(key) else: - contact_triples = self.findNode(key, **kwargs) - rval = {'contacts': contact_triples} - if '_rpcNodeContact' in kwargs: - contact = kwargs['_rpcNodeContact'] - compact_ip = contact.compact_ip() - rval['token'] = self.make_token(compact_ip) - return rval + response['contacts'] = self.findNode(rpc_contact, key) + return response def _generateID(self): """ Generates an n-bit pseudo-random identifier @@ -606,13 +579,15 @@ def _iterativeFind(self, key, startupShortlist=None, rpc='findNode'): return a list of the k closest nodes to the specified key @rtype: twisted.internet.defer.Deferred """ - findValue = rpc != 'findNode' + + if len(key) != constants.key_bits / 8: + raise ValueError("invalid key length: %i" % len(key)) if startupShortlist is None: shortlist = self._routingTable.findCloseNodes(key, constants.k) - if key != self.node_id: - # Update the "last accessed" timestamp for the appropriate k-bucket - self._routingTable.touchKBucket(key) + # if key != self.node_id: + # # Update the "last accessed" timestamp for the appropriate k-bucket + # self._routingTable.touchKBucket(key) if len(shortlist) == 0: log.warning("This node doesnt know any other nodes") # This node doesn't know of any other nodes @@ -621,7 +596,7 @@ def _iterativeFind(self, key, startupShortlist=None, rpc='findNode'): result = yield fakeDf defer.returnValue(result) else: - # This is used during the bootstrap process; node ID's are most probably fake + # This is used during the bootstrap process shortlist = startupShortlist outerDf = defer.Deferred() diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index e1ca25d15c..43a7a16f8c 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -9,7 +9,6 @@ import encoding import msgtypes import msgformat -from contact import Contact from error import BUILTIN_EXCEPTIONS, UnknownRemoteException, TimeoutError log = logging.getLogger(__name__) @@ -29,7 +28,8 @@ def __init__(self, node): self._partialMessagesProgress = {} def sendRPC(self, contact, method, args, rawResponse=False): - """ Sends an RPC to the specified contact + """ + Sends an RPC to the specified contact @param contact: The contact (remote node) to send the RPC to @type contact: kademlia.contacts.Contact @@ -60,19 +60,39 @@ def sendRPC(self, contact, method, args, rawResponse=False): encodedMsg = self._encoder.encode(msgPrimitive) if args: - log.debug("DHT SEND CALL %s(%s)", method, args[0].encode('hex')) + log.debug("%s:%i SEND CALL %s(%s) TO %s:%i", self._node.externalIP, self._node.port, method, + args[0].encode('hex'), contact.address, contact.port) else: - log.debug("DHT SEND CALL %s", method) + log.debug("%s:%i SEND CALL %s TO %s:%i", self._node.externalIP, self._node.port, method, + contact.address, contact.port) df = defer.Deferred() if rawResponse: df._rpcRawResponse = True + def _remove_contact(failure): # remove the contact from the routing table and track the failure + try: + self._node.removeContact(contact) + except (ValueError, IndexError): + pass + contact.update_last_failed() + return failure + + def _update_contact(result): # refresh the contact in the routing table + contact.update_last_replied() + d = self._node.addContact(contact) + d.addCallback(lambda _: result) + return d + + df.addCallbacks(_update_contact, _remove_contact) + # Set the RPC timeout timer timeoutCall, cancelTimeout = self._node.reactor_callLater(constants.rpcTimeout, self._msgTimeout, msg.id) + # Transmit the data self._send(encodedMsg, msg.id, (contact.address, contact.port)) - self._sentMessages[msg.id] = (contact.id, df, timeoutCall, method, args) + self._sentMessages[msg.id] = (contact, df, timeoutCall, cancelTimeout, method, args) + df.addErrback(cancelTimeout) return df @@ -115,46 +135,80 @@ def datagramReceived(self, datagram, address): log.warning("Couldn't decode dht datagram from %s", address) return - remoteContact = Contact(message.nodeID, address[0], address[1], self) - - # Refresh the remote node's details in the local node's k-buckets - self._node.addContact(remoteContact) if isinstance(message, msgtypes.RequestMessage): # This is an RPC method request - self._handleRPC(remoteContact, message.id, message.request, message.args) + remoteContact = self._node.contact_manager.make_contact(message.nodeID, address[0], address[1], self) + remoteContact.update_last_requested() + # only add a requesting contact to the routing table if it has replied to one of our requests + if remoteContact.contact_is_good is True: + df = self._node.addContact(remoteContact) + else: + df = defer.succeed(None) + df.addCallback(lambda _: self._handleRPC(remoteContact, message.id, message.request, message.args)) + # if the contact is not known to be bad (yet) and we haven't yet queried it, send it a ping so that it + # will be added to our routing table if successful + if remoteContact.contact_is_good is None and remoteContact.lastReplied is None: + df.addCallback(lambda _: self._ping_queue.enqueue_maybe_ping(remoteContact)) + elif isinstance(message, msgtypes.ErrorMessage): + # The RPC request raised a remote exception; raise it locally + if message.exceptionType in BUILTIN_EXCEPTIONS: + exception_type = BUILTIN_EXCEPTIONS[message.exceptionType] + else: + exception_type = UnknownRemoteException + remoteException = exception_type(message.response) + log.error("DHT RECV REMOTE EXCEPTION FROM %s:%i: %s", address[0], + address[1], remoteException) + if message.id in self._sentMessages: + # Cancel timeout timer for this RPC + remoteContact, df, timeoutCall, timeoutCanceller, method = self._sentMessages[message.id][0:5] + timeoutCanceller() + del self._sentMessages[message.id] + # reject replies coming from a different address than what we sent our request to + if (remoteContact.address, remoteContact.port) != address: + log.warning("Sent request to node %s at %s:%i, got reply from %s:%i", + remoteContact.log_id(), remoteContact.address, + remoteContact.port, address[0], address[1]) + df.errback(TimeoutError(remoteContact.id)) + return + + # this error is returned by nodes that can be contacted but have an old + # and broken version of the ping command, if they return it the node can + # be contacted, so we'll treat it as a successful ping + old_ping_error = "ping() got an unexpected keyword argument '_rpcNodeContact'" + if isinstance(remoteException, TypeError) and \ + remoteException.message == old_ping_error: + log.debug("old pong error") + df.callback('pong') + else: + df.errback(remoteException) elif isinstance(message, msgtypes.ResponseMessage): # Find the message that triggered this response if message.id in self._sentMessages: # Cancel timeout timer for this RPC - df, timeoutCall = self._sentMessages[message.id][1:3] - timeoutCall.cancel() + remoteContact, df, timeoutCall, timeoutCanceller, method = self._sentMessages[message.id][0:5] + timeoutCanceller() del self._sentMessages[message.id] + log.debug("%s:%i RECV response to %s from %s:%i", self._node.externalIP, self._node.port, + method, remoteContact.address, remoteContact.port) + + # When joining the network we made Contact objects for the seed nodes with node ids set to None + # Thus, the sent_to_id will also be None, and the contact objects need the ids to be manually set. + # These replies have be distinguished from those where the node id in the datagram does not match + # the node id of the node we sent a message to (these messages are treated as an error) + if remoteContact.id and remoteContact.id != message.nodeID: # sent_to_id will be None for bootstrap + log.debug("mismatch: (%s) %s:%i (%s vs %s)", method, remoteContact.address, remoteContact.port, + remoteContact.log_id(False), message.nodeID.encode('hex')) + df.errback(TimeoutError(remoteContact.id)) + return + elif not remoteContact.id: + remoteContact.set_id(message.nodeID) if hasattr(df, '_rpcRawResponse'): # The RPC requested that the raw response message # and originating address be returned; do not # interpret it df.callback((message, address)) - elif isinstance(message, msgtypes.ErrorMessage): - # The RPC request raised a remote exception; raise it locally - if message.exceptionType in BUILTIN_EXCEPTIONS: - exception_type = BUILTIN_EXCEPTIONS[message.exceptionType] - else: - exception_type = UnknownRemoteException - remoteException = exception_type(message.response) - # this error is returned by nodes that can be contacted but have an old - # and broken version of the ping command, if they return it the node can - # be contacted, so we'll treat it as a successful ping - old_ping_error = "ping() got an unexpected keyword argument '_rpcNodeContact'" - if isinstance(remoteException, TypeError) and \ - remoteException.message == old_ping_error: - log.debug("old pong error") - df.callback('pong') - else: - log.error("DHT RECV REMOTE EXCEPTION FROM %s:%i: %s", address[0], - address[1], remoteException) - df.errback(remoteException) else: # We got a result from the RPC df.callback(message.response) @@ -259,28 +313,29 @@ def handleResult(result): # Execute the RPC func = getattr(self._node, method, None) - if callable(func) and hasattr(func, 'rpcmethod'): + if callable(func) and hasattr(func, "rpcmethod"): # Call the exposed Node method and return the result to the deferred callback chain if args: - log.debug("DHT RECV CALL %s(%s) %s:%i", method, args[0].encode('hex'), - senderContact.address, senderContact.port) + log.debug("%s:%i RECV CALL %s(%s) %s:%i", self._node.externalIP, self._node.port, method, + args[0].encode('hex'), senderContact.address, senderContact.port) else: - log.debug("DHT RECV CALL %s %s:%i", method, senderContact.address, - senderContact.port) + log.debug("%s:%i RECV CALL %s %s:%i", self._node.externalIP, self._node.port, method, + senderContact.address, senderContact.port) try: if method != 'ping': - kwargs = {'_rpcNodeID': senderContact.id, '_rpcNodeContact': senderContact} - result = func(*args, **kwargs) + result = func(senderContact, *args) else: result = func() except Exception, e: - log.exception("error handling request for %s: %s", senderContact.address, method) + log.exception("error handling request for %s:%i %s", senderContact.address, + senderContact.port, method) df.errback(e) else: df.callback(result) else: # No such exposed method df.errback(AttributeError('Invalid method: %s' % method)) + return df def _msgTimeout(self, messageID): """ Called when an RPC request message times out """ @@ -289,30 +344,30 @@ def _msgTimeout(self, messageID): # This should never be reached log.error("deferred timed out, but is not present in sent messages list!") return - remoteContactID, df, timeout_call, method, args = self._sentMessages[messageID] + remoteContact, df, timeout_call, timeout_canceller, method, args = self._sentMessages[messageID] if self._partialMessages.has_key(messageID): # We are still receiving this message - self._msgTimeoutInProgress(messageID, remoteContactID, df, method, args) + self._msgTimeoutInProgress(messageID, timeout_canceller, remoteContact, df, method, args) return del self._sentMessages[messageID] # The message's destination node is now considered to be dead; # raise an (asynchronous) TimeoutError exception and update the host node - self._node.removeContact(remoteContactID) - df.errback(TimeoutError(remoteContactID)) + df.errback(TimeoutError(remoteContact.id)) - def _msgTimeoutInProgress(self, messageID, remoteContactID, df, method, args): + def _msgTimeoutInProgress(self, messageID, timeoutCanceller, remoteContact, df, method, args): # See if any progress has been made; if not, kill the message if self._hasProgressBeenMade(messageID): # Reset the RPC timeout timer - timeoutCall, _ = self._node.reactor_callLater(constants.rpcTimeout, self._msgTimeout, messageID) - self._sentMessages[messageID] = (remoteContactID, df, timeoutCall, method, args) + timeoutCanceller() + timeoutCall, cancelTimeout = self._node.reactor_callLater(constants.rpcTimeout, self._msgTimeout, messageID) + self._sentMessages[messageID] = (remoteContact, df, timeoutCall, cancelTimeout, method, args) else: # No progress has been made if messageID in self._partialMessagesProgress: del self._partialMessagesProgress[messageID] if messageID in self._partialMessages: del self._partialMessages[messageID] - df.errback(TimeoutError(remoteContactID)) + df.errback(TimeoutError(remoteContact.id)) def _hasProgressBeenMade(self, messageID): return ( diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 16e3ef1cb7..0b20fa6218 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -202,16 +202,16 @@ def getRefreshList(self, startIndex=0, force=False): bucketIndex += 1 return refreshIDs - def removeContact(self, contactID): - """ Remove the contact with the specified node ID from the routing - table + def removeContact(self, contact): + """ + Remove the contact from the routing table - @param contactID: The node ID of the contact to remove - @type contactID: str + @param contact: The contact to remove + @type contact: dht.contact._Contact """ - bucketIndex = self._kbucketIndex(contactID) + bucketIndex = self._kbucketIndex(contact.id) try: - self._buckets[bucketIndex].removeContact(contactID) + self._buckets[bucketIndex].removeContact(contact) except ValueError: return From e5703833cf6da8394b9730a83504bc1dd14742cb Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:33:22 -0400 Subject: [PATCH 12/79] prevent duplicate entries in the datastore --- lbrynet/dht/datastore.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lbrynet/dht/datastore.py b/lbrynet/dht/datastore.py index 34304f29f5..57cdac9ae5 100644 --- a/lbrynet/dht/datastore.py +++ b/lbrynet/dht/datastore.py @@ -40,7 +40,8 @@ def hasPeersForBlob(self, key): def addPeerToBlob(self, key, value, lastPublished, originallyPublished, originalPublisherID): if key in self._dict: - self._dict[key].append((value, lastPublished, originallyPublished, originalPublisherID)) + if value not in map(lambda store_tuple: store_tuple[0], self._dict[key]): + self._dict[key].append((value, lastPublished, originallyPublished, originalPublisherID)) else: self._dict[key] = [(value, lastPublished, originallyPublished, originalPublisherID)] From f1e0a784d9e5bb6d05c8f40e6bb139f7a1cfee6e Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:37:20 -0400 Subject: [PATCH 13/79] refactor iterativeFind, move to own file --- lbrynet/dht/iterativefind.py | 226 ++++++++++++++++++++++++++++++++ lbrynet/dht/node.py | 247 +---------------------------------- 2 files changed, 228 insertions(+), 245 deletions(-) create mode 100644 lbrynet/dht/iterativefind.py diff --git a/lbrynet/dht/iterativefind.py b/lbrynet/dht/iterativefind.py new file mode 100644 index 0000000000..40e77a0229 --- /dev/null +++ b/lbrynet/dht/iterativefind.py @@ -0,0 +1,226 @@ +import logging +from twisted.internet import defer +from distance import Distance +from error import TimeoutError +import constants + +log = logging.getLogger(__name__) + + +def get_contact(contact_list, node_id, address, port): + for contact in contact_list: + if contact.id == node_id and contact.address == address and contact.port == port: + return contact + raise IndexError(node_id) + + +class _IterativeFind(object): + # TODO: use polymorphism to search for a value or node + # instead of using a find_value flag + def __init__(self, node, shortlist, key, rpc): + self.node = node + self.finished_deferred = defer.Deferred() + # all distance operations in this class only care about the distance + # to self.key, so this makes it easier to calculate those + self.distance = Distance(key) + # The closest known and active node yet found + self.closest_node = None if not shortlist else shortlist[0] + self.prev_closest_node = None + # Shortlist of contact objects (the k closest known contacts to the key from the routing table) + self.shortlist = shortlist + # The search key + self.key = str(key) + # The rpc method name (findValue or findNode) + self.rpc = rpc + # List of active queries; len() indicates number of active probes + self.active_probes = [] + # List of contact (address, port) tuples that have already been queried, includes contacts that didn't reply + self.already_contacted = [] + # A list of found and known-to-be-active remote nodes (Contact objects) + self.active_contacts = [] + # Ensure only one searchIteration call is running at a time + self._search_iteration_semaphore = defer.DeferredSemaphore(1) + self._iteration_count = 0 + self.find_value_result = {} + self.pending_iteration_calls = [] + self._lock = defer.DeferredLock() + + @property + def is_find_node_request(self): + return self.rpc == "findNode" + + @property + def is_find_value_request(self): + return self.rpc == "findValue" + + def is_closer(self, responseMsg): + if not self.closest_node: + return True + return self.distance.is_closer(responseMsg.nodeID, self.closest_node.id) + + def getContactTriples(self, result): + if self.is_find_value_request: + contact_triples = result['contacts'] + else: + contact_triples = result + for contact_tup in contact_triples: + if not isinstance(contact_tup, (list, tuple)) or len(contact_tup) != 3: + raise ValueError("invalid contact triple") + return contact_triples + + def sortByDistance(self, contact_list): + """Sort the list of contacts in order by distance from key""" + contact_list.sort(key=lambda c: self.distance(c.id)) + + @defer.inlineCallbacks + def extendShortlist(self, contact, responseTuple): + # The "raw response" tuple contains the response message and the originating address info + responseMsg = responseTuple[0] + originAddress = responseTuple[1] # tuple: (ip address, udp port) + if self.finished_deferred.called: + defer.returnValue(responseMsg.nodeID) + if self.node.contact_manager.is_ignored(originAddress): + raise ValueError("contact is ignored") + if responseMsg.nodeID == self.node.node_id: + defer.returnValue(responseMsg.nodeID) + + yield self._lock.acquire() + + if contact not in self.active_contacts: + self.active_contacts.append(contact) + if contact not in self.shortlist: + self.shortlist.append(contact) + + # Now grow extend the (unverified) shortlist with the returned contacts + result = responseMsg.response + # TODO: some validation on the result (for guarding against attacks) + # If we are looking for a value, first see if this result is the value + # we are looking for before treating it as a list of contact triples + if self.is_find_value_request and self.key in result: + # We have found the value + self.find_value_result[self.key] = result[self.key] + self._lock.release() + self.finished_deferred.callback(self.find_value_result) + else: + if self.is_find_value_request: + # We are looking for a value, and the remote node didn't have it + # - mark it as the closest "empty" node, if it is + # TODO: store to this peer after finding the value as per the kademlia spec + if 'closestNodeNoValue' in self.find_value_result: + if self.is_closer(responseMsg): + self.find_value_result['closestNodeNoValue'] = contact + else: + self.find_value_result['closestNodeNoValue'] = contact + contactTriples = self.getContactTriples(result) + for contactTriple in contactTriples: + if (contactTriple[1], contactTriple[2]) in ((c.address, c.port) for c in self.already_contacted): + continue + elif self.node.contact_manager.is_ignored((contactTriple[1], contactTriple[2])): + raise ValueError("contact is ignored") + else: + found_contact = self.node.contact_manager.make_contact(contactTriple[0], contactTriple[1], + contactTriple[2], self.node._protocol) + if found_contact not in self.shortlist: + self.shortlist.append(found_contact) + + self._lock.release() + + if not self.finished_deferred.called: + if self.should_stop(): + self.sortByDistance(self.active_contacts) + self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) + + defer.returnValue(responseMsg.nodeID) + + @defer.inlineCallbacks + def probeContact(self, contact): + fn = getattr(contact, self.rpc) + try: + response_tuple = yield fn(self.key, rawResponse=True) + result = yield self.extendShortlist(contact, response_tuple) + defer.returnValue(result) + except (TimeoutError, defer.CancelledError, ValueError, IndexError): + defer.returnValue(contact.id) + + def should_stop(self): + active_contacts_len = len(self.active_contacts) + if active_contacts_len >= constants.k: + # log.info("there are enough results %s(%s)", self.rpc, self.key.encode('hex')) + return True + if self.prev_closest_node and self.closest_node and self.distance.is_closer( + self.prev_closest_node.id, self.closest_node.id): + # log.info("not getting any closer %s(%s)", self.rpc, self.key.encode('hex')) + return True + return False + + # Send parallel, asynchronous FIND_NODE RPCs to the shortlist of contacts + @defer.inlineCallbacks + def _searchIteration(self): + yield self._lock.acquire() + # Sort the discovered active nodes from closest to furthest + if len(self.active_contacts): + self.sortByDistance(self.active_contacts) + self.prev_closest_node = self.closest_node + self.closest_node = self.active_contacts[0] + + # Sort and store the current shortList length before contacting other nodes + self.sortByDistance(self.shortlist) + probes = [] + already_contacted_addresses = {(c.address, c.port) for c in self.already_contacted} + to_remove = [] + for contact in self.shortlist: + if (contact.address, contact.port) not in already_contacted_addresses: + self.already_contacted.append(contact) + to_remove.append(contact) + probe = self.probeContact(contact) + probes.append(probe) + self.active_probes.append(probe) + if len(probes) == constants.alpha: + break + for contact in to_remove: # these contacts will be re-added to the shortlist when they reply successfully + self.shortlist.remove(contact) + + # log.info("Active probes: %i, contacted %i/%i (%s)", len(self.active_probes), + # len(self.active_contacts), len(self.already_contacted), hex(id(self))) + + # run the probes + if probes: + # Schedule the next iteration if there are any active + # calls (Kademlia uses loose parallelism) + self.searchIteration() + self._lock.release() + + d = defer.gatherResults(probes) + + @defer.inlineCallbacks + def _remove_probes(results): + yield self._lock.acquire() + for probe in probes: + self.active_probes.remove(probe) + self._lock.release() + defer.returnValue(results) + + d.addCallback(_remove_probes) + + elif not self.finished_deferred.called and not self.active_probes: + # If no probes were sent, there will not be any improvement, so we're done + self.sortByDistance(self.active_contacts) + self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) + + def searchIteration(self): + def _cancel_pending_iterations(result): + while self.pending_iteration_calls: + canceller = self.pending_iteration_calls.pop() + canceller() + return result + self.finished_deferred.addBoth(_cancel_pending_iterations) + self._iteration_count += 1 + # log.debug("iteration %i %s(%s...)", self._iteration_count, self.rpc, self.key.encode('hex')[:8]) + call, cancel = self.node.reactor_callLater(1, self._search_iteration_semaphore.run, self._searchIteration) + self.pending_iteration_calls.append(cancel) + + +def iterativeFind(node, shortlist, key, rpc): + helper = _IterativeFind(node, shortlist, key, rpc) + helper.searchIteration() + return helper.finished_deferred diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index b24b923d1c..434b1b1f97 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -26,6 +26,7 @@ from peerfinder import DHTPeerFinder from contact import ContactManager from distance import Distance +from iterativefind import iterativeFind log = logging.getLogger(__name__) @@ -599,12 +600,7 @@ def _iterativeFind(self, key, startupShortlist=None, rpc='findNode'): # This is used during the bootstrap process shortlist = startupShortlist - outerDf = defer.Deferred() - - helper = _IterativeFindHelper(self, outerDf, shortlist, key, findValue, rpc) - # Start the iterations - helper.searchIteration() - result = yield outerDf + result = yield iterativeFind(self, shortlist, key, rpc) defer.returnValue(result) @defer.inlineCallbacks @@ -623,242 +619,3 @@ def _refreshRoutingTable(self): searchID = nodeIDs.pop() yield self.iterativeFindNode(searchID) defer.returnValue(None) - - -# This was originally a set of nested methods in _iterativeFind -# but they have been moved into this helper class in-order to -# have better scoping and readability -class _IterativeFindHelper(object): - # TODO: use polymorphism to search for a value or node - # instead of using a find_value flag - def __init__(self, node, outer_d, shortlist, key, find_value, rpc): - self.node = node - self.outer_d = outer_d - self.shortlist = shortlist - self.key = key - self.find_value = find_value - self.rpc = rpc - # all distance operations in this class only care about the distance - # to self.key, so this makes it easier to calculate those - self.distance = Distance(key) - # List of active queries; len() indicates number of active probes - # - # n.b: using lists for these variables, because Python doesn't - # allow binding a new value to a name in an enclosing - # (non-global) scope - self.active_probes = [] - # List of contact IDs that have already been queried - self.already_contacted = [] - # Probes that were active during the previous iteration - # A list of found and known-to-be-active remote nodes - self.active_contacts = [] - # This should only contain one entry; the next scheduled iteration call - self.pending_iteration_calls = [] - self.prev_closest_node = [None] - self.find_value_result = {} - self.slow_node_count = [0] - - def extendShortlist(self, responseTuple): - """ @type responseMsg: kademlia.msgtypes.ResponseMessage """ - # The "raw response" tuple contains the response message, - # and the originating address info - responseMsg = responseTuple[0] - originAddress = responseTuple[1] # tuple: (ip adress, udp port) - # Make sure the responding node is valid, and abort the operation if it isn't - if responseMsg.nodeID in self.active_contacts or responseMsg.nodeID == self.node.node_id: - return responseMsg.nodeID - - # Mark this node as active - aContact = self._getActiveContact(responseMsg, originAddress) - self.active_contacts.append(aContact) - - # This makes sure "bootstrap"-nodes with "fake" IDs don't get queried twice - if responseMsg.nodeID not in self.already_contacted: - self.already_contacted.append(responseMsg.nodeID) - - # Now grow extend the (unverified) shortlist with the returned contacts - result = responseMsg.response - # TODO: some validation on the result (for guarding against attacks) - # If we are looking for a value, first see if this result is the value - # we are looking for before treating it as a list of contact triples - if self.find_value is True and self.key in result and not 'contacts' in result: - # We have found the value - self.find_value_result[self.key] = result[self.key] - else: - if self.find_value is True: - self._setClosestNodeValue(responseMsg, aContact) - self._keepSearching(result) - return responseMsg.nodeID - - def _getActiveContact(self, responseMsg, originAddress): - if responseMsg.nodeID in self.shortlist: - # Get the contact information from the shortlist... - return self.shortlist[self.shortlist.index(responseMsg.nodeID)] - else: - # If it's not in the shortlist; we probably used a fake ID to reach it - # - reconstruct the contact, using the real node ID this time - return Contact( - responseMsg.nodeID, originAddress[0], originAddress[1], self.node._protocol) - - def _keepSearching(self, result): - contactTriples = self._getContactTriples(result) - for contactTriple in contactTriples: - self._addIfValid(contactTriple) - - def _getContactTriples(self, result): - if self.find_value is True: - return result['contacts'] - else: - return result - - def _setClosestNodeValue(self, responseMsg, aContact): - # We are looking for a value, and the remote node didn't have it - # - mark it as the closest "empty" node, if it is - if 'closestNodeNoValue' in self.find_value_result: - if self._is_closer(responseMsg): - self.find_value_result['closestNodeNoValue'] = aContact - else: - self.find_value_result['closestNodeNoValue'] = aContact - - def _is_closer(self, responseMsg): - return self.distance.is_closer(responseMsg.nodeID, self.active_contacts[0].id) - - def _addIfValid(self, contactTriple): - if isinstance(contactTriple, (list, tuple)) and len(contactTriple) == 3: - testContact = Contact( - contactTriple[0], contactTriple[1], contactTriple[2], self.node._protocol) - if testContact not in self.shortlist: - self.shortlist.append(testContact) - - def removeFromShortlist(self, failure, deadContactID): - """ @type failure: twisted.python.failure.Failure """ - failure.trap(TimeoutError, defer.CancelledError, TypeError) - if len(deadContactID) != constants.key_bits / 8: - raise ValueError("invalid lbry id") - if deadContactID in self.shortlist: - self.shortlist.remove(deadContactID) - return deadContactID - - def cancelActiveProbe(self, contactID): - self.active_probes.pop() - if len(self.active_probes) <= constants.alpha / 2 and len(self.pending_iteration_calls): - # Force the iteration - self.pending_iteration_calls[0].cancel() - del self.pending_iteration_calls[0] - self.searchIteration() - - def sortByDistance(self, contact_list): - """Sort the list of contacts in order by distance from key""" - ExpensiveSort(contact_list, self.distance.to_contact).sort() - - # Send parallel, asynchronous FIND_NODE RPCs to the shortlist of contacts - def searchIteration(self): - self.slow_node_count[0] = len(self.active_probes) - # Sort the discovered active nodes from closest to furthest - self.sortByDistance(self.active_contacts) - # This makes sure a returning probe doesn't force calling this function by mistake - while len(self.pending_iteration_calls): - del self.pending_iteration_calls[0] - # See if should continue the search - if self.key in self.find_value_result: - self.outer_d.callback(self.find_value_result) - return - elif len(self.active_contacts) and self.find_value is False: - if self._is_all_done(): - # TODO: Re-send the FIND_NODEs to all of the k closest nodes not already queried - # - # Ok, we're done; either we have accumulated k active - # contacts or no improvement in closestNode has been - # noted - self.outer_d.callback(self.active_contacts) - return - - # The search continues... - if len(self.active_contacts): - self.prev_closest_node[0] = self.active_contacts[0] - contactedNow = 0 - self.sortByDistance(self.shortlist) - # Store the current shortList length before contacting other nodes - prevShortlistLength = len(self.shortlist) - for contact in self.shortlist: - if contact.id not in self.already_contacted: - self._probeContact(contact) - contactedNow += 1 - if contactedNow == constants.alpha: - break - if self._should_lookup_active_calls(): - # Schedule the next iteration if there are any active - # calls (Kademlia uses loose parallelism) - call, _ = self.node.reactor_callLater(constants.iterativeLookupDelay, self.searchIteration) - self.pending_iteration_calls.append(call) - # Check for a quick contact response that made an update to the shortList - elif prevShortlistLength < len(self.shortlist): - # Ensure that the closest contacts are taken from the updated shortList - self.searchIteration() - else: - # If no probes were sent, there will not be any improvement, so we're done - self.outer_d.callback(self.active_contacts) - - def _probeContact(self, contact): - self.active_probes.append(contact.id) - rpcMethod = getattr(contact, self.rpc) - df = rpcMethod(self.key, rawResponse=True) - df.addCallback(self.extendShortlist) - df.addErrback(self.removeFromShortlist, contact.id) - df.addCallback(self.cancelActiveProbe) - df.addErrback(lambda _: log.exception('Failed to contact %s', contact)) - self.already_contacted.append(contact.id) - - def _should_lookup_active_calls(self): - return ( - len(self.active_probes) > self.slow_node_count[0] or - ( - len(self.shortlist) < constants.k and - len(self.active_contacts) < len(self.shortlist) and - len(self.active_probes) > 0 - ) - ) - - def _is_all_done(self): - return ( - len(self.active_contacts) >= constants.k or - ( - self.active_contacts[0] == self.prev_closest_node[0] and - len(self.active_probes) == self.slow_node_count[0] - ) - ) - - -class ExpensiveSort(object): - """Sort a list in place. - - The result of `key(item)` is cached for each item in the `to_sort` - list as an optimization. This can be useful when `key` is - expensive. - - Attributes: - to_sort: a list of items to sort - key: callable, like `key` in normal python sort - attr: the attribute name used to cache the value on each item. - """ - - def __init__(self, to_sort, key, attr='__value'): - self.to_sort = to_sort - self.key = key - self.attr = attr - - def sort(self): - self._cacheValues() - self._sortByValue() - self._removeValue() - - def _cacheValues(self): - for item in self.to_sort: - setattr(item, self.attr, self.key(item)) - - def _sortByValue(self): - self.to_sort.sort(key=operator.attrgetter(self.attr)) - - def _removeValue(self): - for item in self.to_sort: - delattr(item, self.attr) From 95ed1e044bab5ce4aed1e4d04092cbddd65a5650 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:41:56 -0400 Subject: [PATCH 14/79] raise TransportNotConnected instead of logging a warning -add a _listening Deferred to KademliaProtocol which is called when the protocol is started --- lbrynet/dht/error.py | 4 ++++ lbrynet/dht/node.py | 3 ++- lbrynet/dht/protocol.py | 8 +++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lbrynet/dht/error.py b/lbrynet/dht/error.py index 3d44cf3f11..89cf89fab1 100644 --- a/lbrynet/dht/error.py +++ b/lbrynet/dht/error.py @@ -39,3 +39,7 @@ def __init__(self, remote_contact_id): msg = 'Timeout connecting to uninitialized node' Exception.__init__(self, msg) self.remote_contact_id = remote_contact_id + + +class TransportNotConnected(Exception): + pass diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 434b1b1f97..34327a9642 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -223,7 +223,8 @@ def joinNetwork(self, known_node_addresses=None): """ self.start_listening() - # #TODO: Refresh all k-buckets further away than this node's closest neighbour + yield self._protocol._listening + # TODO: Refresh all k-buckets further away than this node's closest neighbour self.safe_start_looping_call(self._change_token_lc, constants.tokenSecretChangeInterval) # Start refreshing k-buckets periodically, if necessary self.bootstrap_join(known_node_addresses or [], self._joinDeferred) diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index 43a7a16f8c..52c99475c2 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -4,12 +4,12 @@ from twisted.internet import protocol, defer from lbrynet.core.call_later_manager import CallLaterManager +from error import BUILTIN_EXCEPTIONS, UnknownRemoteException, TimeoutError, TransportNotConnected import constants import encoding import msgtypes import msgformat -from error import BUILTIN_EXCEPTIONS, UnknownRemoteException, TimeoutError log = logging.getLogger(__name__) @@ -26,6 +26,7 @@ def __init__(self, node): self._sentMessages = {} self._partialMessages = {} self._partialMessagesProgress = {} + self._listening = defer.Deferred(None) def sendRPC(self, contact, method, args, rawResponse=False): """ @@ -97,7 +98,8 @@ def _update_contact(result): # refresh the contact in the routing table return df def startProtocol(self): - log.info("DHT listening on UDP %i", self._node.port) + log.info("DHT listening on UDP %s:%i", self._node.externalIP, self._node.port) + self._listening.callback(True) def datagramReceived(self, datagram, address): """ Handles and parses incoming RPC messages (and responses) @@ -279,7 +281,7 @@ def _write(self, txData, address): log.error("DHT socket error: %s (%i)", err.message, err.errno) raise err else: - log.warning("transport not connected!") + raise TransportNotConnected() def _sendResponse(self, contact, rpcID, response): """ Send a RPC response to the specified contact From d4e28216a07686843288335d8d828d0525d38a58 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:42:49 -0400 Subject: [PATCH 15/79] sort KBucket.getContacts --- lbrynet/dht/kbucket.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/lbrynet/dht/kbucket.py b/lbrynet/dht/kbucket.py index bb4cfc0dc4..dfd3f5ae85 100644 --- a/lbrynet/dht/kbucket.py +++ b/lbrynet/dht/kbucket.py @@ -1,6 +1,10 @@ +import logging import constants +from distance import Distance from error import BucketFull +log = logging.getLogger(__name__) + class KBucket(object): """ Description - later @@ -28,7 +32,7 @@ def addContact(self, contact): already @param contact: The contact to add - @type contact: kademlia.contact.Contact + @type contact: dht.contact._Contact """ if contact in self._contacts: # Move the existing contact to the end of the list @@ -56,19 +60,21 @@ def getContact(self, contactID): return contact raise IndexError(contactID) - def getContacts(self, count=-1, excludeContact=None): + def getContacts(self, count=-1, excludeContact=None, sort_distance_to=None): """ Returns a list containing up to the first count number of contacts @param count: The amount of contacts to return (if 0 or less, return all contacts) @type count: int - @param excludeContact: A contact to exclude; if this contact is in + @param excludeContact: A node id to exclude; if this contact is in the list of returned values, it will be discarded before returning. If a C{str} is passed as this argument, it must be the contact's ID. - @type excludeContact: kademlia.contact.Contact or str + @type excludeContact: str + @param sort_distance_to: Sort distance to the id, defaulting to the parent node id. If False don't + sort the contacts @raise IndexError: If the number of requested contacts is too large @@ -76,38 +82,35 @@ def getContacts(self, count=-1, excludeContact=None): If no contacts are present an empty is returned @rtype: list """ + contacts = [contact for contact in self._contacts if contact.id != excludeContact] + # Return all contacts in bucket if count <= 0: - count = len(self._contacts) + count = len(contacts) # Get current contact number - currentLen = len(self._contacts) + currentLen = len(contacts) # If count greater than k - return only k contacts if count > constants.k: count = constants.k - # Check if count value in range and, - # if count number of contacts are available if not currentLen: - contactList = list() + return contacts - # length of list less than requested amount - elif currentLen < count: - contactList = self._contacts[0:currentLen] - # enough contacts in list + if sort_distance_to is False: + pass else: - contactList = self._contacts[0:count] + sort_distance_to = sort_distance_to or self._node_id + contacts.sort(key=lambda c: Distance(sort_distance_to)(c.id)) - if excludeContact in contactList: - contactList.remove(excludeContact) + return contacts[:min(currentLen, count)] def getBadOrUnknownContacts(self): contacts = self.getContacts(sort_distance_to=False) results = [contact for contact in contacts if contact.contact_is_good is False] results.extend(contact for contact in contacts if contact.contact_is_good is None) return results - return contactList def removeContact(self, contact): """ Remove the contact from the bucket From c654bfe296cd17c757c7249149599b2b2bf4d76a Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:45:24 -0400 Subject: [PATCH 16/79] use reactor clock in TreeRoutingTable instead of time module --- lbrynet/dht/routingtable.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 0b20fa6218..9e0d0cb49f 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -43,7 +43,8 @@ def __init__(self, parentNodeID, getTime=None): self._parentNodeID = parentNodeID self._buckets = [kbucket.KBucket(rangeMin=0, rangeMax=2 ** constants.key_bits, node_id=self._parentNodeID)] if not getTime: - from time import time as getTime + from twisted.internet import reactor + getTime = reactor.seconds self._getTime = getTime def addContact(self, contact): From 5631a2488120c3ededec0771ebd45a89bd6d3636 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:47:20 -0400 Subject: [PATCH 17/79] improve findCloseNodes, choose closest contacts from higher and lower buckets --- lbrynet/dht/routingtable.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 9e0d0cb49f..65156297e9 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -106,7 +106,7 @@ def replaceContact(failure, deadContactID): # contact, and append the new one df.addErrback(replaceContact, head_contact.id) - def findCloseNodes(self, key, count, _rpcNodeID=None): + def findCloseNodes(self, key, count, sender_node_id=None): """ Finds a number of known nodes closest to the node/value with the specified key. @@ -114,10 +114,10 @@ def findCloseNodes(self, key, count, _rpcNodeID=None): @type key: str @param count: the amount of contacts to return @type count: int - @param _rpcNodeID: Used during RPC, this is be the sender's Node ID - Whatever ID is passed in the paramater will get - excluded from the list of returned contacts. - @type _rpcNodeID: str + @param sender_node_id: Used during RPC, this is be the sender's Node ID + Whatever ID is passed in the paramater will get + excluded from the list of returned contacts. + @type sender_node_id: str @return: A list of node contacts (C{kademlia.contact.Contact instances}) closest to the specified key. @@ -129,7 +129,8 @@ def findCloseNodes(self, key, count, _rpcNodeID=None): bucketIndex = self._kbucketIndex(key) if bucketIndex < len(self._buckets): - closestNodes = self._buckets[bucketIndex].getContacts(count, _rpcNodeID) + # sort these + closestNodes = self._buckets[bucketIndex].getContacts(count, sender_node_id, sort_distance_to=key) else: closestNodes = [] # This method must return k contacts (even if we have the node @@ -142,21 +143,27 @@ def findCloseNodes(self, key, count, _rpcNodeID=None): def get_remain(closest): return min(count, constants.k) - len(closest) - # Fill up the node list to k nodes, starting with the closest neighbouring nodes known + distance = Distance(key) + while len(closestNodes) < min(count, constants.k) and (canGoLower or canGoHigher): - # TODO: this may need to be optimized - # TODO: add "key" kwarg to getContacts() to sort contacts returned by xor distance - # to the key + iteration_contacts = [] + # get contacts from lower and/or higher buckets without sorting them if canGoLower and len(closestNodes) < min(count, constants.k): - closestNodes.extend( - self._buckets[bucketIndex - i].getContacts(get_remain(closestNodes), - _rpcNodeID)) + lower_bucket = self._buckets[bucketIndex - i] + contacts = lower_bucket.getContacts(get_remain(closestNodes), sender_node_id, sort_distance_to=False) + iteration_contacts.extend(contacts) canGoLower = bucketIndex - (i + 1) >= 0 + if canGoHigher and len(closestNodes) < min(count, constants.k): - closestNodes.extend(self._buckets[bucketIndex + i].getContacts( - get_remain(closestNodes), _rpcNodeID)) + higher_bucket = self._buckets[bucketIndex + i] + contacts = higher_bucket.getContacts(get_remain(closestNodes), sender_node_id, sort_distance_to=False) + iteration_contacts.extend(contacts) canGoHigher = bucketIndex + (i + 1) < len(self._buckets) i += 1 + # sort the combined contacts and add as many as possible/needed to the combined contact list + iteration_contacts.sort(key=lambda c: distance(c.id), reverse=True) + while len(iteration_contacts) and len(closestNodes) < min(count, constants.k): + closestNodes.append(iteration_contacts.pop()) return closestNodes def getContact(self, contactID): From cf3359044d971fd64e275a0c16bbbc35046d7181 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:49:38 -0400 Subject: [PATCH 18/79] fix conditions for when a kbucket should be split https://stackoverflow.com/a/32187456 --- lbrynet/dht/routingtable.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 65156297e9..8c905a1143 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -47,6 +47,26 @@ def __init__(self, parentNodeID, getTime=None): getTime = reactor.seconds self._getTime = getTime + def get_contacts(self): + contacts = [] + for i in range(len(self._buckets)): + for contact in self._buckets[i]._contacts: + contacts.append(contact) + return contacts + + def _shouldSplit(self, bucketIndex, toAdd): + # https://stackoverflow.com/questions/32129978/highly-unbalanced-kademlia-routing-table/32187456#32187456 + if self._buckets[bucketIndex].keyInRange(self._parentNodeID): + return True + contacts = self.get_contacts() + distance = Distance(self._parentNodeID) + contacts.sort(key=lambda c: distance(c.id)) + if len(contacts) < constants.k: + kth_contact = contacts[-1] + else: + kth_contact = contacts[constants.k-1] + return distance(toAdd) < distance(kth_contact.id) + def addContact(self, contact): """ Add the given contact to the correct k-bucket; if it already exists, its status will be updated @@ -63,7 +83,7 @@ def addContact(self, contact): except kbucket.BucketFull: # The bucket is full; see if it can be split (by checking # if its range includes the host node's id) - if self._buckets[bucketIndex].keyInRange(self._parentNodeID): + if self._shouldSplit(bucketIndex, contact.id): self._splitBucket(bucketIndex) # Retry the insertion attempt self.addContact(contact) From 05241012a29d97fedcc1e8a8d61c6fe8058e84fa Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:53:23 -0400 Subject: [PATCH 19/79] update contact replacement in KBucket to follow BEP0005 http://www.bittorrent.org/beps/bep_0005.html --- lbrynet/dht/routingtable.py | 105 +++++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 36 deletions(-) diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 8c905a1143..5a4db509a6 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -7,9 +7,11 @@ import random from zope.interface import implements +from twisted.internet import defer import constants import kbucket -import protocol +from error import TimeoutError +from distance import Distance from interface import IRoutingTable import logging @@ -73,58 +75,75 @@ def addContact(self, contact): @param contact: The contact to add to this node's k-buckets @type contact: kademlia.contact.Contact + + @rtype: defer.Deferred """ - if contact.id == self._parentNodeID: - return + if contact.id == self._parentNodeID: + return defer.succeed(None) bucketIndex = self._kbucketIndex(contact.id) try: self._buckets[bucketIndex].addContact(contact) except kbucket.BucketFull: - # The bucket is full; see if it can be split (by checking - # if its range includes the host node's id) + # The bucket is full; see if it can be split (by checking if its range includes the host node's id) if self._shouldSplit(bucketIndex, contact.id): self._splitBucket(bucketIndex) # Retry the insertion attempt - self.addContact(contact) + return self.addContact(contact) else: + # We can't split the k-bucket - # NOTE: - # In section 2.4 of the 13-page version of the - # Kademlia paper, it is specified that in this case, - # the new contact should simply be dropped. However, - # in section 2.2, it states that the head contact in - # the k-bucket (i.e. the least-recently seen node) - # should be pinged - if it does not reply, it should - # be dropped, and the new contact added to the tail of - # the k-bucket. This implementation follows section - # 2.2 regarding this point. - - def replaceContact(failure, deadContactID): - """ Callback for the deferred PING RPC to see if the head - node in the k-bucket is still responding + # + # The 13 page kademlia paper specifies that the least recently contacted node in the bucket + # shall be pinged. If it fails to reply it is replaced with the new contact. If the ping is successful + # the new contact is ignored and not added to the bucket (sections 2.2 and 2.4). + # + # A reasonable extension to this is BEP 0005, which extends the above: + # + # Not all nodes that we learn about are equal. Some are "good" and some are not. + # Many nodes using the DHT are able to send queries and receive responses, + # but are not able to respond to queries from other nodes. It is important that + # each node's routing table must contain only known good nodes. A good node is + # a node has responded to one of our queries within the last 15 minutes. A node + # is also good if it has ever responded to one of our queries and has sent us a + # query within the last 15 minutes. After 15 minutes of inactivity, a node becomes + # questionable. Nodes become bad when they fail to respond to multiple queries + # in a row. Nodes that we know are good are given priority over nodes with unknown status. + # + # When there are bad or questionable nodes in the bucket, the least recent is selected for + # potential replacement (BEP 0005). When all nodes in the bucket are fresh, the head (least recent) + # contact is selected as described in section 2.2 of the kademlia paper. In both cases the new contact + # is ignored if the pinged node replies. + + def replaceContact(failure, deadContact): + """ + Callback for the deferred PING RPC to see if the node to be replaced in the k-bucket is still + responding @type failure: twisted.python.failure.Failure """ - failure.trap(protocol.TimeoutError) - if len(deadContactID) != constants.key_bits / 8: - raise ValueError("invalid contact id") - log.debug("Replacing dead contact: %s", deadContactID.encode('hex')) + failure.trap(TimeoutError) + log.debug("Replacing dead contact in bucket %i: %s:%i (%s) with %s:%i (%s)", bucketIndex, + deadContact.address, deadContact.port, deadContact.log_id(), contact.address, contact.port, + contact.log_id()) try: - # Remove the old contact... - self._buckets[bucketIndex].removeContact(deadContactID) + self._buckets[bucketIndex].removeContact(deadContact) except ValueError: # The contact has already been removed (probably due to a timeout) pass - # ...and add the new one at the tail of the bucket - self.addContact(contact) - - # Ping the least-recently seen contact in this k-bucket - head_contact = self._buckets[bucketIndex]._contacts[0] - df = head_contact.ping() - # If there's an error (i.e. timeout), remove the head - # contact, and append the new one - df.addErrback(replaceContact, head_contact.id) + return self.addContact(contact) + + not_good_contacts = self._buckets[bucketIndex].getBadOrUnknownContacts() + if not_good_contacts: + to_replace = not_good_contacts[0] + else: + to_replace = self._buckets[bucketIndex]._contacts[0] + df = to_replace.ping() + df.addErrback(replaceContact, to_replace) + return df + else: + self.touchKBucketByIndex(bucketIndex) + return defer.succeed(None) def findCloseNodes(self, key, count, sender_node_id=None): """ Finds a number of known nodes closest to the node/value with the @@ -250,7 +269,9 @@ def touchKBucket(self, key): @param key: A key in the range of the target k-bucket @type key: str """ - bucketIndex = self._kbucketIndex(key) + self.touchKBucketByIndex(self._kbucketIndex(key)) + + def touchKBucketByIndex(self, bucketIndex): self._buckets[bucketIndex].lastAccessed = int(self._getTime()) def _kbucketIndex(self, key): @@ -312,4 +333,16 @@ def _splitBucket(self, oldBucketIndex): for contact in newBucket._contacts: oldBucket.removeContact(contact) + def contactInRoutingTable(self, address_tuple): + for bucket in self._buckets: + for contact in bucket.getContacts(sort_distance_to=False): + if address_tuple[0] == contact.address and address_tuple[1] == contact.port: + return True + return False + def bucketsWithContacts(self): + count = 0 + for bucket in self._buckets: + if len(bucket): + count += 1 + return count From 1adf4f78186bab0c7a7463d35e0ea4fdb6db5ae9 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:54:55 -0400 Subject: [PATCH 20/79] fix constant used to check if a bucket is fresh --- lbrynet/dht/routingtable.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 5a4db509a6..540871bb5e 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -242,8 +242,9 @@ def getRefreshList(self, startIndex=0, force=False): """ bucketIndex = startIndex refreshIDs = [] + now = int(self._getTime()) for bucket in self._buckets[startIndex:]: - if force or (int(self._getTime()) - bucket.lastAccessed >= constants.refreshTimeout): + if force or now - bucket.lastAccessed >= constants.checkRefreshInterval: searchID = self._randomIDInBucketRange(bucketIndex) refreshIDs.append(searchID) bucketIndex += 1 From c65274e9e52da668cd972a1a90d82a75acde1307 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 17:56:16 -0400 Subject: [PATCH 21/79] add PingQueue to KademliaProtocol --- lbrynet/dht/protocol.py | 74 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index 52c99475c2..0ab200b131 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -1,6 +1,7 @@ import logging import socket import errno +from collections import deque from twisted.internet import protocol, defer from lbrynet.core.call_later_manager import CallLaterManager @@ -14,6 +15,76 @@ log = logging.getLogger(__name__) +class PingQueue(object): + """ + Schedules a 15 minute delayed ping after a new node sends us a query. This is so the new node gets added to the + routing table after having been given enough time for a pinhole to expire. + """ + + def __init__(self, node): + self._node = node + self._get_time = self._node.clock.seconds + self._queue = deque() + self._enqueued_contacts = {} + self._semaphore = defer.DeferredSemaphore(1) + self._ping_semaphore = defer.DeferredSemaphore(constants.alpha) + self._process_lc = node.get_looping_call(self._semaphore.run, self._process) + self._delay = 300 + + def _add_contact(self, contact): + if contact in self._enqueued_contacts: + return defer.succeed(None) + self._enqueued_contacts[contact] = self._get_time() + self._delay + self._queue.append(contact) + return defer.succeed(None) + + @defer.inlineCallbacks + def _process(self): + if not len(self._queue): + defer.returnValue(None) + contact = self._queue.popleft() + now = self._get_time() + + # if the oldest contact in the queue isn't old enough to be pinged, add it back to the queue and return + if now < self._enqueued_contacts[contact]: + self._queue.appendleft(contact) + defer.returnValue(None) + + def _ping(contact): + d = contact.ping() + d.addErrback(lambda err: err.trap(TimeoutError)) + return d + + pinged = [] + checked = [] + while now > self._enqueued_contacts[contact]: + checked.append(contact) + if contact.contact_is_good is None: + pinged.append(contact) + if not len(self._queue): + break + contact = self._queue.popleft() + if not now > self._enqueued_contacts[contact]: + checked.append(contact) + # log.info("ping %i/%i peers", len(pinged), len(checked)) + + yield defer.DeferredList([self._ping_semaphore.run(_ping, contact) for contact in pinged]) + + for contact in checked: + if contact in self._enqueued_contacts: + del self._enqueued_contacts[contact] + + defer.returnValue(None) + + def start(self): + return self._node.safe_start_looping_call(self._process_lc, 60) + + def stop(self): + return self._node.safe_stop_looping_call(self._process_lc) + + def enqueue_maybe_ping(self, contact): + return self._semaphore.run(self._add_contact, contact) + class KademliaProtocol(protocol.DatagramProtocol): """ Implements all low-level network-related functions of a Kademlia node """ @@ -27,6 +98,7 @@ def __init__(self, node): self._partialMessages = {} self._partialMessagesProgress = {} self._listening = defer.Deferred(None) + self._ping_queue = PingQueue(self._node) def sendRPC(self, contact, method, args, rawResponse=False): """ @@ -100,6 +172,7 @@ def _update_contact(result): # refresh the contact in the routing table def startProtocol(self): log.info("DHT listening on UDP %s:%i", self._node.externalIP, self._node.port) self._listening.callback(True) + return self._ping_queue.start() def datagramReceived(self, datagram, address): """ Handles and parses incoming RPC messages (and responses) @@ -386,5 +459,6 @@ def stopProtocol(self): Will only be called once, after all ports are disconnected. """ log.info('Stopping DHT') + self._ping_queue.stop() CallLaterManager.stop() log.info('DHT stopped') From 372fb45e06883a3d3889d7b141ca414cbafe4b07 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:01:30 -0400 Subject: [PATCH 22/79] refactor dht bootstrap after finding the closest nodes try to populate the buckets out by looking up random ids in their key ranges --- lbrynet/core/Session.py | 2 +- lbrynet/dht/node.py | 115 ++++++++++++++++++++++++++++------------ 2 files changed, 83 insertions(+), 34 deletions(-) diff --git a/lbrynet/core/Session.py b/lbrynet/core/Session.py index 0543ad2116..d05c492393 100644 --- a/lbrynet/core/Session.py +++ b/lbrynet/core/Session.py @@ -234,7 +234,7 @@ def _setup_dht(self): # does not block startup, the dht will re-attempt if nece self.hash_announcer = hashannouncer.DHTHashAnnouncer(self.dht_node, self.storage) self.peer_manager = self.dht_node.peer_manager self.peer_finder = self.dht_node.peer_finder - self._join_dht_deferred = self.dht_node.joinNetwork(self.known_dht_nodes) + self._join_dht_deferred = self.dht_node.start(self.known_dht_nodes) self._join_dht_deferred.addCallback(lambda _: log.info("Joined the dht")) self._join_dht_deferred.addCallback(lambda _: self.hash_announcer.start()) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 34327a9642..a60b32dfdb 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -8,21 +8,19 @@ # may be created by processing this file with epydoc: http://epydoc.sf.net import binascii import hashlib -import operator import struct import time import logging from twisted.internet import defer, error, task -from lbrynet.core.utils import generate_id +from lbrynet.core.utils import generate_id, DeferredDict from lbrynet.core.call_later_manager import CallLaterManager from lbrynet.core.PeerManager import PeerManager - +from error import TimeoutError import constants import routingtable import datastore import protocol -from error import TimeoutError from peerfinder import DHTPeerFinder from contact import ContactManager from distance import Distance @@ -172,45 +170,93 @@ def start_listening(self): log.error("Couldn't bind to port %d. %s", self.port, traceback.format_exc()) raise ValueError("%s lbrynet may already be running." % str(e)) else: - log.warning("Already bound to port %d", self._listeningPort.port) + log.warning("Already bound to port %s", self._listeningPort) - def bootstrap_join(self, known_node_addresses, finished_d): + @defer.inlineCallbacks + def joinNetwork(self, known_node_addresses=(('jack.lbry.tech', 4455), )): """ Attempt to join the dht, retry every 30 seconds if unsuccessful :param known_node_addresses: [(str, int)] list of hostnames and ports for known dht seed nodes - :param finished_d: (defer.Deferred) called when join succeeds """ + + self._join_deferred = defer.Deferred() + known_node_resolution = {} + @defer.inlineCallbacks def _resolve_seeds(): + result = {} + for host, port in known_node_addresses: + node_address = yield self.reactor_resolve(host) + result[(host, port)] = node_address + defer.returnValue(result) + + if not known_node_resolution: + known_node_resolution = yield _resolve_seeds() + # we are one of the seed nodes, don't add ourselves + if (self.externalIP, self.port) in known_node_resolution.itervalues(): + del known_node_resolution[(self.externalIP, self.port)] + known_node_addresses.remove((self.externalIP, self.port)) + + def _ping_contacts(contacts): + d = DeferredDict({contact: contact.ping() for contact in contacts}, consumeErrors=True) + d.addErrback(lambda err: err.trap(TimeoutError)) + return d + + @defer.inlineCallbacks + def _initialize_routing(): bootstrap_contacts = [] - for node_address, port in known_node_addresses: - host = yield self.reactor_resolve(node_address) - # Create temporary contact information for the list of addresses of known nodes - contact = Contact(self._generateID(), host, port, self._protocol) - bootstrap_contacts.append(contact) - if not bootstrap_contacts: - if not self.hasContacts(): - log.warning("No known contacts!") + contact_addresses = {(c.address, c.port): c for c in self.contacts} + for (host, port), ip_address in known_node_resolution.iteritems(): + if (host, port) not in contact_addresses: + # Create temporary contact information for the list of addresses of known nodes + # The contact node id will be set with the responding node id when we initialize it to None + contact = self.contact_manager.make_contact(None, ip_address, port, self._protocol) + bootstrap_contacts.append(contact) else: - log.info("found contacts") - bootstrap_contacts = self.contacts - defer.returnValue(bootstrap_contacts) - - def _rerun(closest_nodes): - if not closest_nodes: - log.info("Failed to join the dht, re-attempting in 30 seconds") - self.reactor_callLater(30, self.bootstrap_join, known_node_addresses, finished_d) - elif not finished_d.called: - finished_d.callback(closest_nodes) - - log.info("Attempting to join the DHT network") - d = _resolve_seeds() - # Initiate the Kademlia joining sequence - perform a search for this node's own ID - d.addCallback(lambda contacts: self._iterativeFind(self.node_id, contacts)) - d.addCallback(_rerun) + for contact in self.contacts: + if contact.address == ip_address and contact.port == port: + if not contact.id: + bootstrap_contacts.append(contact) + break + if not bootstrap_contacts: + log.warning("no bootstrap contacts to ping") + ping_result = yield _ping_contacts(bootstrap_contacts) + shortlist = ping_result.keys() + if not shortlist: + log.warning("failed to ping %i bootstrap contacts", len(bootstrap_contacts)) + defer.returnValue(None) + else: + # find the closest peers to us + closest = yield self._iterativeFind(self.node_id, shortlist) + yield _ping_contacts(closest) + # query random hashes in our bucket key ranges to fill or split them + random_ids_in_range = self._routingTable.getRefreshList(force=True) + while random_ids_in_range: + yield self.iterativeFindNode(random_ids_in_range.pop()) + defer.returnValue(None) + + @defer.inlineCallbacks + def _iterative_join(joined_d=None, last_buckets_with_contacts=None): + log.info("Attempting to join the DHT network, %i contacts known so far", len(self.contacts)) + joined_d = joined_d or defer.Deferred() + yield _initialize_routing() + buckets_with_contacts = self.bucketsWithContacts() + if last_buckets_with_contacts and last_buckets_with_contacts == buckets_with_contacts: + if not joined_d.called: + joined_d.callback(True) + elif buckets_with_contacts < 4: + self.reactor_callLater(1, _iterative_join, joined_d, buckets_with_contacts) + elif not joined_d.called: + joined_d.callback(None) + yield joined_d + if not self._join_deferred.called: + self._join_deferred.callback(True) + defer.returnValue(None) + + yield _iterative_join() @defer.inlineCallbacks - def joinNetwork(self, known_node_addresses=None): + def start(self, known_node_addresses=None): """ Causes the Node to attempt to join the DHT network by contacting the known DHT nodes. This can be called multiple times if the previous attempt has failed or if the Node has lost all the contacts. @@ -225,9 +271,10 @@ def joinNetwork(self, known_node_addresses=None): self.start_listening() yield self._protocol._listening # TODO: Refresh all k-buckets further away than this node's closest neighbour + yield self.joinNetwork(known_node_addresses or []) + self.safe_start_looping_call(self._change_token_lc, constants.tokenSecretChangeInterval) # Start refreshing k-buckets periodically, if necessary - self.bootstrap_join(known_node_addresses or [], self._joinDeferred) self.safe_start_looping_call(self._refresh_node_lc, constants.checkRefreshInterval) @property @@ -244,6 +291,8 @@ def hasContacts(self): return True return False + def bucketsWithContacts(self): + return self._routingTable.bucketsWithContacts() def announceHaveBlob(self, key): return self.iterativeAnnounceHaveBlob( key, { From aee7a3aa383fd5f444c7d9eb9dfe5b408884c800 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:09:41 -0400 Subject: [PATCH 23/79] simplify announceHaveBlob, remove unused getPeersForBlob --- lbrynet/dht/node.py | 67 ++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index a60b32dfdb..41b4e7c02d 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -293,34 +293,9 @@ def hasContacts(self): def bucketsWithContacts(self): return self._routingTable.bucketsWithContacts() - def announceHaveBlob(self, key): - return self.iterativeAnnounceHaveBlob( - key, { - 'port': self.peerPort, - 'lbryid': self.node_id, - } - ) @defer.inlineCallbacks - def getPeersForBlob(self, blob_hash, include_node_ids=False): - result = yield self.iterativeFindValue(blob_hash) - expanded_peers = [] - if result: - if blob_hash in result: - for peer in result[blob_hash]: - host = ".".join([str(ord(d)) for d in peer[:4]]) - port, = struct.unpack('>H', peer[4:6]) - if not include_node_ids: - if (host, port) not in expanded_peers: - expanded_peers.append((host, port)) - else: - peer_node_id = peer[6:].encode('hex') - if (host, port, peer_node_id) not in expanded_peers: - expanded_peers.append((host, port, peer_node_id)) - defer.returnValue(expanded_peers) - - @defer.inlineCallbacks - def iterativeAnnounceHaveBlob(self, blob_hash, value): + def announceHaveBlob(self, blob_hash): known_nodes = {} contacts = yield self.iterativeFindNode(blob_hash) # store locally if we're the closest node and there are less than k contacts to try storing to @@ -344,17 +319,14 @@ def announce_to_contact(contact): known_nodes[contact.id] = contact try: responseMsg, originAddress = yield contact.findValue(blob_hash, rawResponse=True) - if responseMsg.nodeID != contact.id: - raise Exception("node id mismatch") - value['token'] = responseMsg.response['token'] - res = yield contact.store(blob_hash, value) + res = yield contact.store(blob_hash, responseMsg.response['token'], self.peerPort) if res != "OK": raise ValueError(res) contacted.append(contact) log.debug("Stored %s to %s (%s)", blob_hash.encode('hex'), contact.id.encode('hex'), originAddress[0]) except protocol.TimeoutError: log.debug("Timeout while storing blob_hash %s at %s", - blob_hash.encode('hex')[:16], contact.id.encode('hex')) + blob_hash.encode('hex')[:16], contact.log_id()) except ValueError as err: log.error("Unexpected response: %s" % err.message) except Exception as err: @@ -430,12 +402,15 @@ def iterativeFindValue(self, key): @rtype: twisted.internet.defer.Deferred """ + if len(key) != constants.key_bits / 8: + raise ValueError("invalid key length!") + # Execute the search - iterative_find_result = yield self._iterativeFind(key, rpc='findValue') - if isinstance(iterative_find_result, dict): + find_result = yield self._iterativeFind(key, rpc='findValue') + if isinstance(find_result, dict): # We have found the value; now see who was the closest contact without it... # ...and store the key/value pair - defer.returnValue(iterative_find_result) + pass else: # The value wasn't found, but a list of contacts was returned # Now, see if we have the value (it might seem wasteful to search on the network @@ -445,10 +420,28 @@ def iterativeFindValue(self, key): # Ok, we have the value locally, so use that # Send this value to the closest node without it peers = self._dataStore.getPeersForBlob(key) - defer.returnValue({key: peers}) + find_result = {key: peers} else: - # Ok, value does not exist in DHT at all - defer.returnValue(iterative_find_result) + pass + + expanded_peers = [] + if find_result: + if key in find_result: + for peer in find_result[key]: + host = ".".join([str(ord(d)) for d in peer[:4]]) + port, = struct.unpack('>H', peer[4:6]) + peer_node_id = peer[6:] + if (host, port, peer_node_id) not in expanded_peers: + expanded_peers.append((peer_node_id, host, port)) + # TODO: get this working + # if 'closestNodeNoValue' in find_result: + # closest_node_without_value = find_result['closestNodeNoValue'] + # try: + # response, address = yield closest_node_without_value.findValue(key, rawResponse=True) + # yield closest_node_without_value.store(key, response.response['token'], self.peerPort) + # except TimeoutError: + # pass + defer.returnValue(expanded_peers) def addContact(self, contact): """ Add/update the given contact; simple wrapper for the same method From ae22468fecfdba50be99cf715405458c0c824306 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:10:23 -0400 Subject: [PATCH 24/79] fix CallLaterManager trying to remove pending calls multiple times --- lbrynet/core/call_later_manager.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lbrynet/core/call_later_manager.py b/lbrynet/core/call_later_manager.py index 2bf858a40c..de73953229 100644 --- a/lbrynet/core/call_later_manager.py +++ b/lbrynet/core/call_later_manager.py @@ -38,7 +38,8 @@ def cancel(reason=None): if call_later.active(): call_later.cancel() - cls._pendingCallLaters.remove(call_later) + if call_later in cls._pendingCallLaters: + cls._pendingCallLaters.remove(call_later) return reason return cancel @@ -53,7 +54,7 @@ def stop(cls): canceller = cls._cancel(cls._pendingCallLaters[0]) try: canceller() - except (defer.CancelledError, defer.AlreadyCalledError): + except (defer.CancelledError, defer.AlreadyCalledError, ValueError): pass @classmethod From 9920ff59d48cee84e2bede73b0dcb5329dbda2de Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:11:14 -0400 Subject: [PATCH 25/79] force KBucket refreshes --- lbrynet/dht/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 41b4e7c02d..5aeb493c31 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -657,7 +657,7 @@ def _refreshNode(self): @defer.inlineCallbacks def _refreshRoutingTable(self): - nodeIDs = self._routingTable.getRefreshList(0, False) + nodeIDs = self._routingTable.getRefreshList(0, True) while nodeIDs: searchID = nodeIDs.pop() yield self.iterativeFindNode(searchID) From 4f72098cadd30a67d69cbd3d61f4f728ecc87f4c Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:11:41 -0400 Subject: [PATCH 26/79] use PingQueue to try refresh all contacts --- lbrynet/dht/node.py | 7 ++++++- lbrynet/dht/protocol.py | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 5aeb493c31..4286e69ac3 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -650,14 +650,19 @@ def _iterativeFind(self, key, startupShortlist=None, rpc='findNode'): def _refreshNode(self): """ Periodically called to perform k-bucket refreshes and data replication/republishing as necessary """ - yield self._refreshRoutingTable() self._dataStore.removeExpiredPeers() defer.returnValue(None) + def _refreshContacts(self): + return defer.DeferredList( + [self._protocol._ping_queue.enqueue_maybe_ping(contact) for contact in self.contacts] + ) + @defer.inlineCallbacks def _refreshRoutingTable(self): nodeIDs = self._routingTable.getRefreshList(0, True) + yield self._refreshContacts() while nodeIDs: searchID = nodeIDs.pop() yield self.iterativeFindNode(searchID) diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index 0ab200b131..49325770c2 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -85,6 +85,7 @@ def stop(self): def enqueue_maybe_ping(self, contact): return self._semaphore.run(self._add_contact, contact) + class KademliaProtocol(protocol.DatagramProtocol): """ Implements all low-level network-related functions of a Kademlia node """ From cc32d987b2a5e49f04e6ea52b405319dceaf4f31 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:26:33 -0400 Subject: [PATCH 27/79] update peer_list --- lbrynet/daemon/Daemon.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/lbrynet/daemon/Daemon.py b/lbrynet/daemon/Daemon.py index 94f3177530..2945e2f3c0 100644 --- a/lbrynet/daemon/Daemon.py +++ b/lbrynet/daemon/Daemon.py @@ -2903,24 +2903,22 @@ def jsonrpc_peer_list(self, blob_hash, timeout=None): if not utils.is_valid_blobhash(blob_hash): raise Exception("invalid blob hash") - finished_deferred = self.session.dht_node.getPeersForBlob(binascii.unhexlify(blob_hash), True) + finished_deferred = self.session.dht_node.iterativeFindValue(binascii.unhexlify(blob_hash)) - def _trigger_timeout(): - if not finished_deferred.called: - log.debug("Peer search for %s timed out", blob_hash) - finished_deferred.cancel() - - timeout = timeout or conf.settings['peer_search_timeout'] - self.session.dht_node.reactor_callLater(timeout, _trigger_timeout) + def trap_timeout(err): + err.trap(defer.TimeoutError) + return [] + finished_deferred.addTimeout(timeout or conf.settings['peer_search_timeout'], self.session.dht_node.clock) + finished_deferred.addErrback(trap_timeout) peers = yield finished_deferred results = [ { + "node_id": node_id.encode('hex'), "host": host, - "port": port, - "node_id": node_id + "port": port } - for host, port, node_id in peers + for node_id, host, port in peers ] defer.returnValue(results) From bdd6f948cac8fa9cc3612cb38e451abfbb5298aa Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:26:43 -0400 Subject: [PATCH 28/79] add port to routing_table_get --- lbrynet/daemon/Daemon.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lbrynet/daemon/Daemon.py b/lbrynet/daemon/Daemon.py index 2945e2f3c0..83ba743da5 100644 --- a/lbrynet/daemon/Daemon.py +++ b/lbrynet/daemon/Daemon.py @@ -3137,6 +3137,7 @@ def jsonrpc_routing_table_get(self): : [ { "address": (str) peer address, + "port": (int) peer udp port "node_id": (str) peer node id, "blobs": (list) blob hashes announced by peer } @@ -3159,7 +3160,7 @@ def jsonrpc_routing_table_get(self): try: contact = self.session.dht_node._routingTable.getContact( originalPublisherID) - except ValueError: + except (ValueError, IndexError): continue if contact in hosts: blobs = hosts[contact] @@ -3182,6 +3183,7 @@ def jsonrpc_routing_table_get(self): blobs = [] host = { "address": contact.address, + "port": contact.port, "node_id": contact.id.encode("hex"), "blobs": blobs, } From e1079a0c0f5761cf641d29260070a48087f8cae0 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:27:14 -0400 Subject: [PATCH 29/79] add seed node script and monitor tool, update dht monitor script --- scripts/dht_monitor.py | 6 +- scripts/dht_seed_monitor.py | 85 ++++++++++++++ scripts/seed_node.py | 218 ++++++++++++++++++++++++++++++++++++ 3 files changed, 306 insertions(+), 3 deletions(-) create mode 100644 scripts/dht_seed_monitor.py create mode 100644 scripts/seed_node.py diff --git a/scripts/dht_monitor.py b/scripts/dht_monitor.py index 60a07f799d..abb7ca66ea 100644 --- a/scripts/dht_monitor.py +++ b/scripts/dht_monitor.py @@ -1,7 +1,7 @@ import curses import time -from jsonrpc.proxy import JSONRPCProxy import logging +from lbrynet.daemon import get_client log = logging.getLogger(__name__) log.addHandler(logging.FileHandler("dht contacts.log")) @@ -9,7 +9,7 @@ log.setLevel(logging.INFO) stdscr = curses.initscr() -api = JSONRPCProxy.from_url("http://localhost:5279") +api = get_client() def init_curses(): @@ -53,7 +53,7 @@ def refresh(last_contacts, last_blobs): stdscr.addstr(y, 0, "bucket %s" % i) y += 1 for h in sorted(buckets[i], key=lambda x: x['node_id'].decode('hex')): - stdscr.addstr(y, 0, '%s (%s) - %i blobs' % (h['node_id'], h['address'], + stdscr.addstr(y, 0, '%s (%s:%i) - %i blobs' % (h['node_id'], h['address'], h['port'], len(h['blobs']))) y += 1 y += 1 diff --git a/scripts/dht_seed_monitor.py b/scripts/dht_seed_monitor.py new file mode 100644 index 0000000000..f075fb7411 --- /dev/null +++ b/scripts/dht_seed_monitor.py @@ -0,0 +1,85 @@ +import curses +import time +import datetime +from jsonrpc.proxy import JSONRPCProxy + +stdscr = curses.initscr() + +api = JSONRPCProxy.from_url("http://localhost:5280") + + +def init_curses(): + curses.noecho() + curses.cbreak() + stdscr.nodelay(1) + stdscr.keypad(1) + + +def teardown_curses(): + curses.nocbreak() + stdscr.keypad(0) + curses.echo() + curses.endwin() + + +def refresh(node_index): + height, width = stdscr.getmaxyx() + node_ids = api.get_node_ids() + node_id = node_ids[node_index] + node_statuses = api.node_status() + running = node_statuses[node_id] + buckets = api.node_routing_table(node_id=node_id) + + for y in range(height): + stdscr.addstr(y, 0, " " * (width - 1)) + + stdscr.addstr(0, 0, "node id: %s, running: %s (%i/%i running)" % (node_id, running, sum(node_statuses.values()), len(node_ids))) + stdscr.addstr(1, 0, "%i buckets, %i contacts" % + (len(buckets), sum([len(buckets[b]['contacts']) for b in buckets]))) + + y = 3 + for i in sorted(buckets.keys()): + stdscr.addstr(y, 0, "bucket %s" % i) + y += 1 + for h in sorted(buckets[i]['contacts'], key=lambda x: x['node_id'].decode('hex')): + stdscr.addstr(y, 0, '%s (%s:%i) failures: %i, last replied to us: %s, last requested from us: %s' % + (h['node_id'], h['address'], h['port'], h['failedRPCs'], + datetime.datetime.fromtimestamp(float(h['lastReplied'] or 0)), + datetime.datetime.fromtimestamp(float(h['lastRequested'] or 0)))) + y += 1 + y += 1 + + stdscr.addstr(y + 1, 0, str(time.time())) + stdscr.refresh() + return len(node_ids) + + +def do_main(): + c = None + nodes = 1 + node_index = 0 + while c not in [ord('q'), ord('Q')]: + try: + nodes = refresh(node_index) + except: + pass + c = stdscr.getch() + if c == curses.KEY_LEFT: + node_index -= 1 + node_index = max(node_index, 0) + elif c == curses.KEY_RIGHT: + node_index += 1 + node_index = min(node_index, nodes - 1) + time.sleep(0.1) + + +def main(): + try: + init_curses() + do_main() + finally: + teardown_curses() + + +if __name__ == "__main__": + main() diff --git a/scripts/seed_node.py b/scripts/seed_node.py new file mode 100644 index 0000000000..18e349dbe9 --- /dev/null +++ b/scripts/seed_node.py @@ -0,0 +1,218 @@ +import struct +import json +import logging +import argparse +import hashlib +from copy import deepcopy +from urllib import urlopen +from twisted.internet import reactor, defer +from twisted.web import resource +from twisted.web.server import Site +from lbrynet import conf +from lbrynet.dht import constants +from lbrynet.dht.node import Node +from lbrynet.dht.error import TransportNotConnected +from lbrynet.core.log_support import configure_console, configure_twisted +from lbrynet.daemon.auth.server import AuthJSONRPCServer + +# configure_twisted() +conf.initialize_settings() +configure_console() +lbrynet_handler = logging.getLogger("lbrynet").handlers[0] +log = logging.getLogger("dht router") +log.addHandler(lbrynet_handler) +log.setLevel(logging.INFO) + + +def node_id_supplier(seed="jack.lbry.tech"): # simple deterministic node id generator + h = hashlib.sha384() + h.update(seed) + while True: + next_id = h.digest() + yield next_id + h = hashlib.sha384() + h.update(seed) + h.update(next_id) + + +def get_external_ip(): + response = json.loads(urlopen("https://api.lbry.io/ip").read()) + if not response['success']: + raise ValueError("failed to get external ip") + return response['data']['ip'] + + +def format_contact(contact): + return { + "node_id": contact.id.encode('hex'), + "address": contact.address, + "port": contact.port, + "lastReplied": contact.lastReplied, + "lastRequested": contact.lastRequested, + "failedRPCs": contact.failedRPCs + } + + +class MultiSeedRPCServer(AuthJSONRPCServer): + def __init__(self, starting_node_port=4455, nodes=50, rpc_port=5280): + AuthJSONRPCServer.__init__(self, False) + self.port = None + self.rpc_port = rpc_port + self.external_ip = get_external_ip() + node_id_gen = node_id_supplier() + self._nodes = [Node(node_id=next(node_id_gen), udpPort=starting_node_port+i, externalIP=self.external_ip) + for i in range(nodes)] + self._own_addresses = [(self.external_ip, starting_node_port+i) for i in range(nodes)] + reactor.addSystemEventTrigger('after', 'startup', self.start) + + @defer.inlineCallbacks + def start(self): + self.announced_startup = True + root = resource.Resource() + root.putChild('', self) + self.port = reactor.listenTCP(self.rpc_port, Site(root), interface='localhost') + log.info("starting %i nodes on %s, rpc available on localhost:%i", len(self._nodes), self.external_ip, self.rpc_port) + + for node in self._nodes: + node.start_listening() + yield node._protocol._listening + + for node1 in self._nodes: + for node2 in self._nodes: + if node1 is node2: + continue + try: + yield node1.addContact(node1.contact_manager.make_contact(node2.node_id, node2.externalIP, + node2.port, node1._protocol)) + except TransportNotConnected: + pass + node1.safe_start_looping_call(node1._change_token_lc, constants.tokenSecretChangeInterval) + node1.safe_start_looping_call(node1._refresh_node_lc, constants.checkRefreshInterval) + node1._join_deferred = defer.succeed(True) + reactor.addSystemEventTrigger('before', 'shutdown', self.stop) + log.info("finished bootstrapping the network, running %i nodes", len(self._nodes)) + + @defer.inlineCallbacks + def stop(self): + yield self.port.stopListening() + yield defer.DeferredList([node.stop() for node in self._nodes]) + + def jsonrpc_get_node_ids(self): + return defer.succeed([node.node_id.encode('hex') for node in self._nodes]) + + def jsonrpc_node_datastore(self, node_id): + def format_datastore(node): + datastore = deepcopy(node._dataStore._dict) + result = {} + for key, values in datastore.iteritems(): + contacts = [] + for (value, last_published, originally_published, original_publisher_id) in values: + host = ".".join([str(ord(d)) for d in value[:4]]) + port, = struct.unpack('>H', value[4:6]) + peer_node_id = value[6:] + contact_dict = format_contact(node.contact_manager.make_contact(peer_node_id, host, port)) + contact_dict['lastPublished'] = last_published + contact_dict['originallyPublished'] = originally_published + contact_dict['originalPublisherID'] = original_publisher_id + contacts.append(contact_dict) + result[key.encode('hex')] = contacts + return result + + for node in self._nodes: + if node.node_id == node_id.decode('hex'): + return defer.succeed(format_datastore(node)) + + def jsonrpc_node_routing_table(self, node_id): + def format_bucket(bucket): + return { + "contacts": [format_contact(contact) for contact in bucket._contacts], + "lastAccessed": bucket.lastAccessed + } + + def format_routing(node): + return { + i: format_bucket(bucket) for i, bucket in enumerate(node._routingTable._buckets) + } + + for node in self._nodes: + if node.node_id == node_id.decode('hex'): + return defer.succeed(format_routing(node)) + + def jsonrpc_restart_node(self, node_id): + for node in self._nodes: + if node.node_id == node_id.decode('hex'): + d = node.stop() + d.addCallback(lambda _: node.start(self._own_addresses)) + return d + + @defer.inlineCallbacks + def jsonrpc_local_node_rpc(self, from_node, query, args=()): + def format_result(response): + if isinstance(response, list): + return [[node_id.encode('hex'), address, port] for (node_id, address, port) in response] + if isinstance(response, dict): + return {'token': response['token'].encode('hex'), 'contacts': format_result(response['contacts'])} + return response + + for node in self._nodes: + if node.node_id == from_node.decode('hex'): + fn = getattr(node, query) + self_contact = node.contact_manager.make_contact(node.node_id, node.externalIP, node.port, node._protocol) + if args: + args = (str(arg) if isinstance(arg, (str, unicode)) else int(arg) for arg in args) + result = yield fn(self_contact, *args) + else: + result = yield fn() + # print "result: %s" % result + defer.returnValue(format_result(result)) + + @defer.inlineCallbacks + def jsonrpc_node_rpc(self, from_node, to_node, query, args=()): + def format_result(response): + if isinstance(response, list): + return [[node_id.encode('hex'), address, port] for (node_id, address, port) in response] + if isinstance(response, dict): + return {'token': response['token'].encode('hex'), 'contacts': format_result(response['contacts'])} + return response + + for node in self._nodes: + if node.node_id == from_node.decode('hex'): + remote = node._routingTable.getContact(to_node.decode('hex')) + fn = getattr(remote, query) + if args: + args = (str(arg).decode('hex') for arg in args) + result = yield fn(*args) + else: + result = yield fn() + defer.returnValue(format_result(result)) + + @defer.inlineCallbacks + def jsonrpc_get_nodes_who_know(self, ip_address): + nodes = [] + for node_id in [n.node_id.encode('hex') for n in self._nodes]: + routing_info = yield self.jsonrpc_node_routing_table(node_id=node_id) + for index, bucket in routing_info.iteritems(): + if ip_address in map(lambda c: c['address'], bucket['contacts']): + nodes.append(node_id) + break + defer.returnValue(nodes) + + def jsonrpc_node_status(self): + return defer.succeed({ + node.node_id.encode('hex'): node._join_deferred is not None and node._join_deferred.called + for node in self._nodes + }) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--rpc_port', default=5280) + parser.add_argument('--starting_port', default=4455) + parser.add_argument('--nodes', default=50) + args = parser.parse_args() + MultiSeedRPCServer(int(args.starting_port), int(args.nodes), int(args.rpc_port)) + reactor.run() + + +if __name__ == "__main__": + main() From 950ec5bc9a00a8765483f82bfeedf128d07fdb4a Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:28:22 -0400 Subject: [PATCH 30/79] update mocks and dht tests -reorganize dht tests --- lbrynet/tests/functional/dht/__init__.py | 0 .../functional/dht/dht_test_environment.py | 174 +++++++++ .../tests/functional/dht/mock_transport.py | 149 +++++++ .../functional/dht/test_bootstrap_network.py | 10 + .../tests/functional/dht/test_contact_rpc.py | 200 ++++++++++ lbrynet/tests/functional/test_dht.py | 274 ------------- lbrynet/tests/mocks.py | 106 +---- .../unit/core/server/test_DHTHashAnnouncer.py | 87 ++--- lbrynet/tests/unit/dht/test_contact.py | 33 +- lbrynet/tests/unit/dht/test_datastore.py | 27 +- lbrynet/tests/unit/dht/test_encoding.py | 15 +- lbrynet/tests/unit/dht/test_kbucket.py | 56 +-- lbrynet/tests/unit/dht/test_messages.py | 2 +- lbrynet/tests/unit/dht/test_node.py | 348 ++++++++--------- lbrynet/tests/unit/dht/test_protocol.py | 367 ++++++++---------- lbrynet/tests/unit/dht/test_routingtable.py | 253 ++++++------ lbrynet/tests/util.py | 32 -- 17 files changed, 1099 insertions(+), 1034 deletions(-) create mode 100644 lbrynet/tests/functional/dht/__init__.py create mode 100644 lbrynet/tests/functional/dht/dht_test_environment.py create mode 100644 lbrynet/tests/functional/dht/mock_transport.py create mode 100644 lbrynet/tests/functional/dht/test_bootstrap_network.py create mode 100644 lbrynet/tests/functional/dht/test_contact_rpc.py delete mode 100644 lbrynet/tests/functional/test_dht.py diff --git a/lbrynet/tests/functional/dht/__init__.py b/lbrynet/tests/functional/dht/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lbrynet/tests/functional/dht/dht_test_environment.py b/lbrynet/tests/functional/dht/dht_test_environment.py new file mode 100644 index 0000000000..57af2c68ad --- /dev/null +++ b/lbrynet/tests/functional/dht/dht_test_environment.py @@ -0,0 +1,174 @@ +import logging +from twisted.trial import unittest +from twisted.internet import defer, task +from lbrynet.dht.node import Node +from mock_transport import resolve, listenUDP, MOCK_DHT_SEED_DNS, mock_node_generator + + +log = logging.getLogger(__name__) + + +class TestKademliaBase(unittest.TestCase): + timeout = 300.0 # timeout for each test + network_size = 16 # including seed nodes + node_ids = None + seed_dns = MOCK_DHT_SEED_DNS + + def _add_next_node(self): + node_id, node_ip = self.mock_node_generator.next() + node = Node(node_id=node_id.decode('hex'), udpPort=4444, peerPort=3333, externalIP=node_ip, + resolve=resolve, listenUDP=listenUDP, callLater=self.clock.callLater, clock=self.clock) + self.nodes.append(node) + return node + + @defer.inlineCallbacks + def add_node(self): + node = self._add_next_node() + yield node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())]) + defer.returnValue(node) + + def get_node(self, node_id): + for node in self.nodes: + if node.node_id == node_id: + return node + raise KeyError(node_id) + + @defer.inlineCallbacks + def pop_node(self): + node = self.nodes.pop() + yield node.stop() + + def pump_clock(self, n, step=0.1, tick_callback=None): + """ + :param n: seconds to run the reactor for + :param step: reactor tick rate (in seconds) + """ + for _ in range(int(n * (1.0 / float(step)))): + self.clock.advance(step) + if tick_callback and callable(tick_callback): + tick_callback(self.clock.seconds()) + + def run_reactor(self, seconds, deferreds, tick_callback=None): + d = defer.DeferredList(deferreds) + self.pump_clock(seconds, tick_callback=tick_callback) + return d + + def get_contacts(self): + contacts = {} + for seed in self._seeds: + contacts[seed] = seed.contacts + for node in self._seeds: + contacts[node] = node.contacts + return contacts + + def get_routable_addresses(self): + known = set() + for n in self._seeds: + known.update([(c.id, c.address, c.port) for c in n.contacts]) + for n in self.nodes: + known.update([(c.id, c.address, c.port) for c in n.contacts]) + addresses = {triple[1] for triple in known} + return addresses + + def get_online_addresses(self): + online = set() + for n in self._seeds: + online.add(n.externalIP) + for n in self.nodes: + online.add(n.externalIP) + return online + + def show_info(self): + known = set() + for n in self._seeds: + known.update([(c.id, c.address, c.port) for c in n.contacts]) + for n in self.nodes: + known.update([(c.id, c.address, c.port) for c in n.contacts]) + + log.info("Routable: %i/%i", len(known), len(self.nodes) + len(self._seeds)) + for n in self._seeds: + log.info("seed %s has %i contacts in %i buckets", n.externalIP, len(n.contacts), + len([b for b in n._routingTable._buckets if b.getContacts()])) + for n in self.nodes: + log.info("node %s has %i contacts in %i buckets", n.externalIP, len(n.contacts), + len([b for b in n._routingTable._buckets if b.getContacts()])) + + @defer.inlineCallbacks + def setUp(self): + self.nodes = [] + self._seeds = [] + self.clock = task.Clock() + self.mock_node_generator = mock_node_generator(mock_node_ids=self.node_ids) + + seed_dl = [] + seeds = sorted(list(self.seed_dns.keys())) + known_addresses = [(seed_name, 4444) for seed_name in seeds] + for seed_dns in seeds: + self._add_next_node() + seed = self.nodes.pop() + self._seeds.append(seed) + seed_dl.append( + seed.start(known_addresses) + ) + yield self.run_reactor(901, seed_dl) + while len(self.nodes + self._seeds) < self.network_size: + network_dl = [] + for i in range(min(10, self.network_size - len(self._seeds) - len(self.nodes))): + network_dl.append(self.add_node()) + yield self.run_reactor(31, network_dl) + self.assertEqual(len(self.nodes + self._seeds), self.network_size) + self.pump_clock(1800) + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() + + @defer.inlineCallbacks + def tearDown(self): + dl = [] + while self.nodes: + dl.append(self.pop_node()) # stop all of the nodes + while self._seeds: + dl.append(self._seeds.pop().stop()) # and the seeds + yield defer.DeferredList(dl) + + def verify_all_nodes_are_routable(self): + routable = set() + node_addresses = {node.externalIP for node in self.nodes} + node_addresses = node_addresses.union({node.externalIP for node in self._seeds}) + for node in self._seeds: + contact_addresses = {contact.address for contact in node.contacts} + routable.update(contact_addresses) + for node in self.nodes: + contact_addresses = {contact.address for contact in node.contacts} + routable.update(contact_addresses) + self.assertSetEqual(routable, node_addresses) + + @defer.inlineCallbacks + def verify_all_nodes_are_pingable(self): + ping_replies = {} + ping_dl = [] + contacted = set() + + def _ping_cb(result, node, replies): + replies[node] = result + + for node in self._seeds: + contact_addresses = set() + for contact in node.contacts: + contact_addresses.add(contact.address) + d = contact.ping() + d.addCallback(_ping_cb, contact.address, ping_replies) + contacted.add(contact.address) + ping_dl.append(d) + for node in self.nodes: + contact_addresses = set() + for contact in node.contacts: + contact_addresses.add(contact.address) + d = contact.ping() + d.addCallback(_ping_cb, contact.address, ping_replies) + contacted.add(contact.address) + ping_dl.append(d) + yield self.run_reactor(2, ping_dl) + node_addresses = {node.externalIP for node in self.nodes}.union({seed.externalIP for seed in self._seeds}) + self.assertSetEqual(node_addresses, contacted) + expected = {node: "pong" for node in contacted} + self.assertDictEqual(ping_replies, expected) diff --git a/lbrynet/tests/functional/dht/mock_transport.py b/lbrynet/tests/functional/dht/mock_transport.py new file mode 100644 index 0000000000..3ce0bae763 --- /dev/null +++ b/lbrynet/tests/functional/dht/mock_transport.py @@ -0,0 +1,149 @@ +import struct +import logging +from twisted.internet import defer, error +from lbrynet.core.utils import generate_id +from lbrynet.dht.encoding import Bencode +from lbrynet.dht.error import DecodeError +from lbrynet.dht.msgformat import DefaultFormat +from lbrynet.dht.msgtypes import ResponseMessage, RequestMessage, ErrorMessage + +_encode = Bencode() +_datagram_formatter = DefaultFormat() + +log = logging.getLogger() + +MOCK_DHT_NODES = [ + "cc8db9d0dd9b65b103594b5f992adf09f18b310958fa451d61ce8d06f3ee97a91461777c2b7dea1a89d02d2f23eb0e4f", + "83a3a398eead3f162fbbe1afb3d63482bb5b6d3cdd8f9b0825c1dfa58dffd3f6f6026d6e64d6d4ae4c3dfe2262e734ba", + "b6928ff25778a7bbb5d258d3b3a06e26db1654f3d2efce8c26681d43f7237cdf2e359a4d309c4473d5d89ec99fb4f573", +] + +MOCK_DHT_SEED_DNS = { # these map to mock nodes 0, 1, and 2 + "lbrynet1.lbry.io": "10.42.42.1", + "lbrynet2.lbry.io": "10.42.42.2", + "lbrynet3.lbry.io": "10.42.42.3", + "lbrynet4.lbry.io": "10.42.42.4", + "lbrynet5.lbry.io": "10.42.42.5", + "lbrynet6.lbry.io": "10.42.42.6", + "lbrynet7.lbry.io": "10.42.42.7", + "lbrynet8.lbry.io": "10.42.42.8", + "lbrynet9.lbry.io": "10.42.42.9", + "lbrynet10.lbry.io": "10.42.42.10", + "lbrynet11.lbry.io": "10.42.42.11", + "lbrynet12.lbry.io": "10.42.42.12", + "lbrynet13.lbry.io": "10.42.42.13", + "lbrynet14.lbry.io": "10.42.42.14", + "lbrynet15.lbry.io": "10.42.42.15", + "lbrynet16.lbry.io": "10.42.42.16", +} + + +def resolve(name, timeout=(1, 3, 11, 45)): + if name not in MOCK_DHT_SEED_DNS: + return defer.fail(error.DNSLookupError(name)) + return defer.succeed(MOCK_DHT_SEED_DNS[name]) + + +class MockUDPTransport(object): + def __init__(self, address, port, max_packet_size, protocol): + self.address = address + self.port = port + self.max_packet_size = max_packet_size + self._node = protocol._node + + def write(self, data, address): + if address in MockNetwork.peers: + dest = MockNetwork.peers[address][0] + debug_kademlia_packet(data, (self.address, self.port), address, self._node) + dest.datagramReceived(data, (self.address, self.port)) + else: # the node is sending to an address that doesnt currently exist, act like it never arrived + pass + + +class MockUDPPort(object): + def __init__(self, protocol, remover): + self.protocol = protocol + self._remover = remover + + def startListening(self, reason=None): + return self.protocol.startProtocol() + + def stopListening(self, reason=None): + result = self.protocol.stopProtocol() + self._remover() + return result + + +class MockNetwork(object): + peers = {} # (interface, port): (protocol, max_packet_size) + + @classmethod + def add_peer(cls, port, protocol, interface, maxPacketSize): + interface = protocol._node.externalIP + protocol.transport = MockUDPTransport(interface, port, maxPacketSize, protocol) + cls.peers[(interface, port)] = (protocol, maxPacketSize) + + def remove_peer(): + del protocol.transport + if (interface, port) in cls.peers: + del cls.peers[(interface, port)] + + return remove_peer + + +def listenUDP(port, protocol, interface='', maxPacketSize=8192): + remover = MockNetwork.add_peer(port, protocol, interface, maxPacketSize) + port = MockUDPPort(protocol, remover) + port.startListening() + return port + + +def address_generator(address=(10, 42, 42, 1)): + def increment(addr): + value = struct.unpack("I", "".join([chr(x) for x in list(addr)[::-1]]))[0] + 1 + new_addr = [] + for i in range(4): + new_addr.append(value % 256) + value >>= 8 + return tuple(new_addr[::-1]) + + while True: + yield "{}.{}.{}.{}".format(*address) + address = increment(address) + + +def mock_node_generator(count=None, mock_node_ids=MOCK_DHT_NODES): + if mock_node_ids is None: + mock_node_ids = MOCK_DHT_NODES + mock_node_ids = list(mock_node_ids) + + for num, node_ip in enumerate(address_generator()): + if count and num >= count: + break + if num >= len(mock_node_ids): + node_id = generate_id().encode('hex') + else: + node_id = mock_node_ids[num] + yield (node_id, node_ip) + + +def debug_kademlia_packet(data, source, destination, node): + if log.level != logging.DEBUG: + return + try: + packet = _datagram_formatter.fromPrimitive(_encode.decode(data)) + if isinstance(packet, RequestMessage): + log.debug("request %s --> %s %s (node time %s)", source[0], destination[0], packet.request, + node.clock.seconds()) + elif isinstance(packet, ResponseMessage): + if isinstance(packet.response, (str, unicode)): + log.debug("response %s <-- %s %s (node time %s)", destination[0], source[0], packet.response, + node.clock.seconds()) + else: + log.debug("response %s <-- %s %i contacts (node time %s)", destination[0], source[0], + len(packet.response), node.clock.seconds()) + elif isinstance(packet, ErrorMessage): + log.error("error %s <-- %s %s (node time %s)", destination[0], source[0], packet.exceptionType, + node.clock.seconds()) + except DecodeError: + log.exception("decode error %s --> %s (node time %s)", source[0], destination[0], node.clock.seconds()) diff --git a/lbrynet/tests/functional/dht/test_bootstrap_network.py b/lbrynet/tests/functional/dht/test_bootstrap_network.py new file mode 100644 index 0000000000..e31c87fe06 --- /dev/null +++ b/lbrynet/tests/functional/dht/test_bootstrap_network.py @@ -0,0 +1,10 @@ +from dht_test_environment import TestKademliaBase + + +class TestKademliaBootstrap(TestKademliaBase): + """ + Test initializing the network / connecting the seed nodes + """ + + def test_bootstrap_seed_nodes(self): + pass diff --git a/lbrynet/tests/functional/dht/test_contact_rpc.py b/lbrynet/tests/functional/dht/test_contact_rpc.py new file mode 100644 index 0000000000..14641a011e --- /dev/null +++ b/lbrynet/tests/functional/dht/test_contact_rpc.py @@ -0,0 +1,200 @@ +import time +import unittest +import logging +from twisted.internet.task import Clock +from twisted.internet import defer +import lbrynet.dht.protocol +import lbrynet.dht.contact +import lbrynet.dht.constants +import lbrynet.dht.msgtypes +from lbrynet.dht.error import TimeoutError +from lbrynet.dht.node import Node, rpcmethod +from lbrynet.core.call_later_manager import CallLaterManager +from mock_transport import listenUDP, resolve + + +log = logging.getLogger() + + +class KademliaProtocolTest(unittest.TestCase): + """ Test case for the Protocol class """ + + udpPort = 9182 + + def setUp(self): + self._reactor = Clock() + CallLaterManager.setup(self._reactor.callLater) + self.node = Node(node_id='1' * 48, udpPort=self.udpPort, externalIP="127.0.0.1", listenUDP=listenUDP, + resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) + + def tearDown(self): + CallLaterManager.stop() + del self._reactor + + @defer.inlineCallbacks + def testReactor(self): + """ Tests if the reactor can start/stop the protocol correctly """ + + d = defer.Deferred() + self._reactor.callLater(1, d.callback, True) + self._reactor.advance(1) + result = yield d + self.assertTrue(result) + + def testRPCTimeout(self): + """ Tests if a RPC message sent to a dead remote node times out correctly """ + dead_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, + resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) + dead_node.start_listening() + dead_node.stop() + self._reactor.pump([1 for _ in range(10)]) + dead_contact = self.node.contact_manager.make_contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) + self.node.addContact(dead_contact) + + @rpcmethod + def fake_ping(*args, **kwargs): + time.sleep(lbrynet.dht.constants.rpcTimeout + 1) + return 'pong' + + real_ping = self.node.ping + real_timeout = lbrynet.dht.constants.rpcTimeout + real_attempts = lbrynet.dht.constants.rpcAttempts + lbrynet.dht.constants.rpcAttempts = 1 + lbrynet.dht.constants.rpcTimeout = 1 + + self.node.ping = fake_ping + # Make sure the contact was added + self.failIf(dead_contact not in self.node.contacts, + 'Contact not added to fake node (error in test code)') + self.node.start_listening() + + # Run the PING RPC (which should raise a timeout error) + df = self.node._protocol.sendRPC(dead_contact, 'ping', {}) + + def check_timeout(err): + self.assertEqual(err.type, TimeoutError) + + df.addErrback(check_timeout) + + def reset_values(): + self.node.ping = real_ping + lbrynet.dht.constants.rpcTimeout = real_timeout + lbrynet.dht.constants.rpcAttempts = real_attempts + + # See if the contact was removed due to the timeout + def check_removed_contact(): + self.failIf(dead_contact in self.node.contacts, + 'Contact was not removed after RPC timeout; check exception types.') + + df.addCallback(lambda _: reset_values()) + + # Stop the reactor if a result arrives (timeout or not) + df.addCallback(lambda _: check_removed_contact()) + self._reactor.pump([1 for _ in range(20)]) + + def testRPCRequest(self): + """ Tests if a valid RPC request is executed and responded to correctly """ + + remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, + resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) + remote_node.start_listening() + remoteContact = remote_node.contact_manager.make_contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) + self.node.addContact(remoteContact) + + self.error = None + + def handleError(f): + self.error = 'An RPC error occurred: %s' % f.getErrorMessage() + + def handleResult(result): + expectedResult = 'pong' + if result != expectedResult: + self.error = 'Result from RPC is incorrect; expected "%s", got "%s"' \ + % (expectedResult, result) + + # Publish the "local" node on the network + self.node.start_listening() + # Simulate the RPC + df = remoteContact.ping() + df.addCallback(handleResult) + df.addErrback(handleError) + + for _ in range(10): + self._reactor.advance(1) + + self.failIf(self.error, self.error) + # The list of sent RPC messages should be empty at this stage + self.failUnlessEqual(len(self.node._protocol._sentMessages), 0, + 'The protocol is still waiting for a RPC result, ' + 'but the transaction is already done!') + + def testRPCAccess(self): + """ Tests invalid RPC requests + Verifies that a RPC request for an existing but unpublished + method is denied, and that the associated (remote) exception gets + raised locally """ + remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, + resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) + remote_node.start_listening() + remote_contact = remote_node.contact_manager.make_contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) + self.node.addContact(remote_contact) + + self.error = None + + def handleError(f): + try: + f.raiseException() + except AttributeError, e: + # This is the expected outcome since the remote node did not publish the method + self.error = None + except Exception, e: + self.error = 'The remote method failed, but the wrong exception was raised; ' \ + 'expected AttributeError, got %s' % type(e) + + def handleResult(result): + self.error = 'The remote method executed successfully, returning: "%s"; ' \ + 'this RPC should not have been allowed.' % result + + self.node.start_listening() + self._reactor.pump([1 for _ in range(10)]) + # Simulate the RPC + df = remote_contact.not_a_rpc_function() + df.addCallback(handleResult) + df.addErrback(handleError) + self._reactor.pump([1 for _ in range(10)]) + self.failIf(self.error, self.error) + # The list of sent RPC messages should be empty at this stage + self.failUnlessEqual(len(self.node._protocol._sentMessages), 0, + 'The protocol is still waiting for a RPC result, ' + 'but the transaction is already done!') + + def testRPCRequestArgs(self): + """ Tests if an RPC requiring arguments is executed correctly """ + remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, + resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) + remote_node.start_listening() + remote_contact = remote_node.contact_manager.make_contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) + self.node.addContact(remote_contact) + self.error = None + + def handleError(f): + self.error = 'An RPC error occurred: %s' % f.getErrorMessage() + + def handleResult(result): + expectedResult = 'pong' + if result != expectedResult: + self.error = 'Result from RPC is incorrect; expected "%s", got "%s"' % \ + (expectedResult, result) + + # Publish the "local" node on the network + self.node.start_listening() + # Simulate the RPC + df = remote_contact.ping() + df.addCallback(handleResult) + df.addErrback(handleError) + self._reactor.pump([1 for _ in range(10)]) + self.failIf(self.error, self.error) + # The list of sent RPC messages should be empty at this stage + self.failUnlessEqual(len(self.node._protocol._sentMessages), 0, + 'The protocol is still waiting for a RPC result, ' + 'but the transaction is already done!') diff --git a/lbrynet/tests/functional/test_dht.py b/lbrynet/tests/functional/test_dht.py deleted file mode 100644 index 6921858806..0000000000 --- a/lbrynet/tests/functional/test_dht.py +++ /dev/null @@ -1,274 +0,0 @@ -import time -import logging -from twisted.trial import unittest -from twisted.internet import defer, threads, task -from lbrynet.dht.node import Node -from lbrynet.tests import mocks -from lbrynet.core.utils import generate_id - -log = logging.getLogger("lbrynet.tests.util") -# log.addHandler(logging.StreamHandler()) -# log.setLevel(logging.DEBUG) - - -class TestKademliaBase(unittest.TestCase): - timeout = 300.0 # timeout for each test - network_size = 0 # plus lbrynet1, lbrynet2, and lbrynet3 seed nodes - node_ids = None - seed_dns = mocks.MOCK_DHT_SEED_DNS - - def _add_next_node(self): - node_id, node_ip = self.mock_node_generator.next() - node = Node(node_id=node_id.decode('hex'), udpPort=4444, peerPort=3333, externalIP=node_ip, - resolve=mocks.resolve, listenUDP=mocks.listenUDP, callLater=self.clock.callLater, clock=self.clock) - self.nodes.append(node) - return node - - @defer.inlineCallbacks - def add_node(self): - node = self._add_next_node() - yield node.joinNetwork( - [ - ("lbrynet1.lbry.io", self._seeds[0].port), - ("lbrynet2.lbry.io", self._seeds[1].port), - ("lbrynet3.lbry.io", self._seeds[2].port), - ] - ) - defer.returnValue(node) - - def get_node(self, node_id): - for node in self.nodes: - if node.node_id == node_id: - return node - raise KeyError(node_id) - - @defer.inlineCallbacks - def pop_node(self): - node = self.nodes.pop() - yield node.stop() - - def pump_clock(self, n, step=0.01): - """ - :param n: seconds to run the reactor for - :param step: reactor tick rate (in seconds) - """ - for _ in range(n * 100): - self.clock.advance(step) - - def run_reactor(self, seconds, *deferreds): - dl = [threads.deferToThread(self.pump_clock, seconds)] - for d in deferreds: - dl.append(d) - return defer.DeferredList(dl) - - @defer.inlineCallbacks - def setUp(self): - self.nodes = [] - self._seeds = [] - self.clock = task.Clock() - self.mock_node_generator = mocks.mock_node_generator(mock_node_ids=self.node_ids) - - join_dl = [] - for seed_dns in self.seed_dns: - other_seeds = list(self.seed_dns.keys()) - other_seeds.remove(seed_dns) - - self._add_next_node() - seed = self.nodes.pop() - self._seeds.append(seed) - join_dl.append( - seed.joinNetwork([(other_seed_dns, 4444) for other_seed_dns in other_seeds]) - ) - - if self.network_size: - for _ in range(self.network_size): - join_dl.append(self.add_node()) - yield self.run_reactor(1, *tuple(join_dl)) - self.verify_all_nodes_are_routable() - - @defer.inlineCallbacks - def tearDown(self): - dl = [] - while self.nodes: - dl.append(self.pop_node()) # stop all of the nodes - while self._seeds: - dl.append(self._seeds.pop().stop()) # and the seeds - yield defer.DeferredList(dl) - - def verify_all_nodes_are_routable(self): - routable = set() - node_addresses = {node.externalIP for node in self.nodes} - node_addresses = node_addresses.union({node.externalIP for node in self._seeds}) - for node in self._seeds: - contact_addresses = {contact.address for contact in node.contacts} - routable.update(contact_addresses) - for node in self.nodes: - contact_addresses = {contact.address for contact in node.contacts} - routable.update(contact_addresses) - self.assertSetEqual(routable, node_addresses) - - @defer.inlineCallbacks - def verify_all_nodes_are_pingable(self): - ping_replies = {} - ping_dl = [] - contacted = set() - - def _ping_cb(result, node, replies): - replies[node] = result - - for node in self._seeds: - contact_addresses = set() - for contact in node.contacts: - contact_addresses.add(contact.address) - d = contact.ping() - d.addCallback(_ping_cb, contact.address, ping_replies) - contacted.add(contact.address) - ping_dl.append(d) - for node in self.nodes: - contact_addresses = set() - for contact in node.contacts: - contact_addresses.add(contact.address) - d = contact.ping() - d.addCallback(_ping_cb, contact.address, ping_replies) - contacted.add(contact.address) - ping_dl.append(d) - self.run_reactor(2, *ping_dl) - yield threads.deferToThread(time.sleep, 0.1) - node_addresses = {node.externalIP for node in self.nodes}.union({seed.externalIP for seed in self._seeds}) - self.assertSetEqual(node_addresses, contacted) - self.assertDictEqual(ping_replies, {node: "pong" for node in contacted}) - - -class TestKademliaBootstrap(TestKademliaBase): - """ - Test initializing the network / connecting the seed nodes - """ - - def test_bootstrap_network(self): # simulates the real network, which has three seeds - self.assertEqual(len(self._seeds[0].contacts), 2) - self.assertEqual(len(self._seeds[1].contacts), 2) - self.assertEqual(len(self._seeds[2].contacts), 2) - - self.assertSetEqual( - {self._seeds[0].contacts[0].address, self._seeds[0].contacts[1].address}, - {self._seeds[1].externalIP, self._seeds[2].externalIP} - ) - - self.assertSetEqual( - {self._seeds[1].contacts[0].address, self._seeds[1].contacts[1].address}, - {self._seeds[0].externalIP, self._seeds[2].externalIP} - ) - - self.assertSetEqual( - {self._seeds[2].contacts[0].address, self._seeds[2].contacts[1].address}, - {self._seeds[0].externalIP, self._seeds[1].externalIP} - ) - - def test_all_nodes_are_pingable(self): - return self.verify_all_nodes_are_pingable() - - -class TestKademliaBootstrapSixteenSeeds(TestKademliaBase): - node_ids = [ - '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', - '111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111', - '222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222', - '333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333', - '444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444444', - '555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555', - '666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666', - '777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777', - '888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888', - '999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999', - 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', - 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb', - 'cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc', - 'dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd', - 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee', - 'ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff' - ] - - @defer.inlineCallbacks - def setUp(self): - self.seed_dns.update( - { - "lbrynet4.lbry.io": "10.42.42.4", - "lbrynet5.lbry.io": "10.42.42.5", - "lbrynet6.lbry.io": "10.42.42.6", - "lbrynet7.lbry.io": "10.42.42.7", - "lbrynet8.lbry.io": "10.42.42.8", - "lbrynet9.lbry.io": "10.42.42.9", - "lbrynet10.lbry.io": "10.42.42.10", - "lbrynet11.lbry.io": "10.42.42.11", - "lbrynet12.lbry.io": "10.42.42.12", - "lbrynet13.lbry.io": "10.42.42.13", - "lbrynet14.lbry.io": "10.42.42.14", - "lbrynet15.lbry.io": "10.42.42.15", - "lbrynet16.lbry.io": "10.42.42.16", - } - ) - yield TestKademliaBase.setUp(self) - - @defer.inlineCallbacks - def tearDown(self): - yield TestKademliaBase.tearDown(self) - - def test_bootstrap_network(self): - pass - - def _test_all_nodes_are_pingable(self): - return self.verify_all_nodes_are_pingable() - - -class Test250NodeNetwork(TestKademliaBase): - network_size = 250 - - def test_setup_network_and_verify_connectivity(self): - pass - - def update_network(self): - import random - dl = [] - announced_blobs = [] - - for node in self.nodes: # random events - if random.randint(0, 10000) < 75 and announced_blobs: # get peers for a blob - log.info('find blob') - blob_hash = random.choice(announced_blobs) - dl.append(node.getPeersForBlob(blob_hash)) - if random.randint(0, 10000) < 25: # announce a blob - log.info('announce blob') - blob_hash = generate_id() - announced_blobs.append((blob_hash, node.node_id)) - dl.append(node.announceHaveBlob(blob_hash)) - - random.shuffle(self.nodes) - - # kill nodes - while random.randint(0, 100) > 95: - dl.append(self.pop_node()) - log.info('pop node') - - # add nodes - while random.randint(0, 100) > 95: - dl.append(self.add_node()) - log.info('add node') - return tuple(dl), announced_blobs - - @defer.inlineCallbacks - def _test_simulate_network(self): - total_blobs = [] - for i in range(100): - d, blobs = self.update_network() - total_blobs.extend(blobs) - self.run_reactor(1, *d) - yield threads.deferToThread(time.sleep, 0.1) - routable = set() - node_addresses = {node.externalIP for node in self.nodes} - for node in self.nodes: - contact_addresses = {contact.address for contact in node.contacts} - routable.update(contact_addresses) - log.warning("difference: %i", len(node_addresses.difference(routable))) - log.info("blobs %i", len(total_blobs)) - log.info("step %i, %i nodes", i, len(self.nodes)) - self.pump_clock(100) diff --git a/lbrynet/tests/mocks.py b/lbrynet/tests/mocks.py index 12770c1888..5074e0531f 100644 --- a/lbrynet/tests/mocks.py +++ b/lbrynet/tests/mocks.py @@ -1,21 +1,18 @@ import base64 -import struct import io from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.asymmetric import rsa from cryptography.hazmat.primitives import serialization -from twisted.internet import defer, error +from twisted.internet import defer from twisted.python.failure import Failure from lbrynet.core.client.ClientRequest import ClientRequest from lbrynet.core.Error import RequestCanceledError from lbrynet.core import BlobAvailability -from lbrynet.core.utils import generate_id from lbrynet.dht.node import Node as RealNode from lbrynet.daemon import ExchangeRateManager as ERM from lbrynet import conf -from util import debug_kademlia_packet KB = 2**10 PUBLIC_EXPONENT = 65537 # http://www.daemonology.net/blog/2009-06-11-cryptographic-right-answers.html @@ -41,6 +38,9 @@ def joinNetwork(self, known_node_addresses=None): def stop(self): return defer.succeed(None) + def start(self, known_node_addresses=None): + return self.joinNetwork(known_node_addresses) + class FakeNetwork(object): @staticmethod @@ -188,9 +188,15 @@ def stop(self): def get_info_exchanger(self): return PointTraderKeyExchanger(self) + def update_peer_address(self, peer, address): + pass + def get_wallet_info_query_handler_factory(self): return PointTraderKeyQueryHandlerFactory(self) + def get_unused_address_for_peer(self, peer): + return defer.succeed("bDtL6qriyimxz71DSYjojTBsm6cpM1bqmj") + def reserve_points(self, *args): return True @@ -250,18 +256,12 @@ def add_supplier(self, supplier): def immediate_announce(self, *args): pass - def run_manage_loop(self): - pass - def start(self): pass def stop(self): pass - def get_next_announce_time(self): - return 0 - class GenFile(io.RawIOBase): def __init__(self, size, pattern): @@ -410,89 +410,3 @@ def _reset_settings(): conf.settings = original_settings obj.addCleanup(_reset_settings) - - -MOCK_DHT_NODES = [ - "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", - "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", - "DEADBEEFDEADBEEFDEADBEEFDEADBEEFDEADBEEFDEADBEEFDEADBEEFDEADBEEFDEADBEEFDEADBEEFDEADBEEFDEADBEEF", -] - -MOCK_DHT_SEED_DNS = { # these map to mock nodes 0, 1, and 2 - "lbrynet1.lbry.io": "10.42.42.1", - "lbrynet2.lbry.io": "10.42.42.2", - "lbrynet3.lbry.io": "10.42.42.3", -} - - -def resolve(name, timeout=(1, 3, 11, 45)): - if name not in MOCK_DHT_SEED_DNS: - return defer.fail(error.DNSLookupError(name)) - return defer.succeed(MOCK_DHT_SEED_DNS[name]) - - -class MockUDPTransport(object): - def __init__(self, address, port, max_packet_size, protocol): - self.address = address - self.port = port - self.max_packet_size = max_packet_size - self._node = protocol._node - - def write(self, data, address): - dest = MockNetwork.peers[address][0] - debug_kademlia_packet(data, (self.address, self.port), address, self._node) - dest.datagramReceived(data, (self.address, self.port)) - - -class MockUDPPort(object): - def __init__(self, protocol): - self.protocol = protocol - - def startListening(self, reason=None): - return self.protocol.startProtocol() - - def stopListening(self, reason=None): - return self.protocol.stopProtocol() - - -class MockNetwork(object): - peers = {} # (interface, port): (protocol, max_packet_size) - - @classmethod - def add_peer(cls, port, protocol, interface, maxPacketSize): - interface = protocol._node.externalIP - protocol.transport = MockUDPTransport(interface, port, maxPacketSize, protocol) - cls.peers[(interface, port)] = (protocol, maxPacketSize) - - -def listenUDP(port, protocol, interface='', maxPacketSize=8192): - MockNetwork.add_peer(port, protocol, interface, maxPacketSize) - return MockUDPPort(protocol) - - -def address_generator(address=(10, 42, 42, 1)): - def increment(addr): - value = struct.unpack("I", "".join([chr(x) for x in list(addr)[::-1]]))[0] + 1 - new_addr = [] - for i in range(4): - new_addr.append(value % 256) - value >>= 8 - return tuple(new_addr[::-1]) - - while True: - yield "{}.{}.{}.{}".format(*address) - address = increment(address) - - -def mock_node_generator(count=None, mock_node_ids=MOCK_DHT_NODES): - if mock_node_ids is None: - mock_node_ids = MOCK_DHT_NODES - - for num, node_ip in enumerate(address_generator()): - if count and num >= count: - break - if num >= len(mock_node_ids): - node_id = generate_id().encode('hex') - else: - node_id = mock_node_ids[num] - yield (node_id, node_ip) diff --git a/lbrynet/tests/unit/core/server/test_DHTHashAnnouncer.py b/lbrynet/tests/unit/core/server/test_DHTHashAnnouncer.py index 2f67d5567b..60021ffc9a 100644 --- a/lbrynet/tests/unit/core/server/test_DHTHashAnnouncer.py +++ b/lbrynet/tests/unit/core/server/test_DHTHashAnnouncer.py @@ -1,82 +1,55 @@ -import tempfile -import shutil from twisted.trial import unittest -from twisted.internet import defer, reactor, threads +from twisted.internet import defer, task +from lbrynet.core import utils from lbrynet.tests.util import random_lbry_hash -from lbrynet.dht.hashannouncer import DHTHashAnnouncer -from lbrynet.core.call_later_manager import CallLaterManager -from lbrynet.database.storage import SQLiteStorage - class MocDHTNode(object): - def __init__(self, announce_will_fail=False): - # if announce_will_fail is True, - # announceHaveBlob will return empty dict - self.call_later_manager = CallLaterManager - self.call_later_manager.setup(reactor.callLater) + def __init__(self): self.blobs_announced = 0 - self.announce_will_fail = announce_will_fail def announceHaveBlob(self, blob): - if self.announce_will_fail: - return_val = {} - else: - return_val = {blob: ["ab"*48]} - self.blobs_announced += 1 - d = defer.Deferred() - self.call_later_manager.call_later(1, d.callback, return_val) - return d - + return defer.succeed(True) + +class MocSupplier(object): + def __init__(self, blobs_to_announce): + self.blobs_to_announce = blobs_to_announce + self.announced = False + def hashes_to_announce(self): + if not self.announced: + self.announced = True + return defer.succeed(self.blobs_to_announce) + else: + return defer.succeed([]) class DHTHashAnnouncerTest(unittest.TestCase): - @defer.inlineCallbacks + def setUp(self): - from lbrynet.conf import initialize_settings - initialize_settings(False) self.num_blobs = 10 self.blobs_to_announce = [] for i in range(0, self.num_blobs): self.blobs_to_announce.append(random_lbry_hash()) + self.clock = task.Clock() self.dht_node = MocDHTNode() - self.dht_node.peerPort = 3333 - self.dht_node.clock = reactor - self.db_dir = tempfile.mkdtemp() - self.storage = SQLiteStorage(self.db_dir) - yield self.storage.setup() - self.announcer = DHTHashAnnouncer(self.dht_node, self.storage, 10) - for blob_hash in self.blobs_to_announce: - yield self.storage.add_completed_blob(blob_hash, 100, 0, 1) - - @defer.inlineCallbacks - def tearDown(self): - self.dht_node.call_later_manager.stop() - yield self.storage.stop() - yield threads.deferToThread(shutil.rmtree, self.db_dir) + utils.call_later = self.clock.callLater + from lbrynet.core.server.DHTHashAnnouncer import DHTHashAnnouncer + self.announcer = DHTHashAnnouncer(self.dht_node, peer_port=3333) + self.supplier = MocSupplier(self.blobs_to_announce) + self.announcer.add_supplier(self.supplier) - @defer.inlineCallbacks - def test_announce_fail(self): - # test what happens when node.announceHaveBlob() returns empty dict - self.dht_node.announce_will_fail = True - d = yield self.announcer.manage() - yield d - - @defer.inlineCallbacks def test_basic(self): - d = self.announcer.immediate_announce(self.blobs_to_announce) - self.assertEqual(len(self.announcer.hash_queue), self.num_blobs) - yield d + self.announcer._announce_available_hashes() + self.assertEqual(self.announcer.hash_queue_size(), self.announcer.CONCURRENT_ANNOUNCERS) + self.clock.advance(1) self.assertEqual(self.dht_node.blobs_announced, self.num_blobs) - self.assertEqual(len(self.announcer.hash_queue), 0) + self.assertEqual(self.announcer.hash_queue_size(), 0) - @defer.inlineCallbacks def test_immediate_announce(self): # Test that immediate announce puts a hash at the front of the queue - d = self.announcer.immediate_announce(self.blobs_to_announce) - self.assertEqual(len(self.announcer.hash_queue), self.num_blobs) + self.announcer._announce_available_hashes() blob_hash = random_lbry_hash() self.announcer.immediate_announce([blob_hash]) - self.assertEqual(len(self.announcer.hash_queue), self.num_blobs+1) - self.assertEqual(blob_hash, self.announcer.hash_queue[-1]) - yield d + self.assertEqual(self.announcer.hash_queue_size(), self.announcer.CONCURRENT_ANNOUNCERS+1) + self.assertEqual(blob_hash, self.announcer.hash_queue[0][0]) + diff --git a/lbrynet/tests/unit/dht/test_contact.py b/lbrynet/tests/unit/dht/test_contact.py index bcd34c8f93..b150e2fbf4 100644 --- a/lbrynet/tests/unit/dht/test_contact.py +++ b/lbrynet/tests/unit/dht/test_contact.py @@ -1,16 +1,24 @@ -import unittest - -from lbrynet.dht import contact +from twisted.internet import task +from twisted.trial import unittest +from lbrynet.core.utils import generate_id +from lbrynet.dht.contact import ContactManager +from lbrynet.dht import constants class ContactOperatorsTest(unittest.TestCase): """ Basic tests case for boolean operators on the Contact class """ def setUp(self): - self.firstContact = contact.Contact('firstContactID', '127.0.0.1', 1000, None, 1) - self.secondContact = contact.Contact('2ndContactID', '192.168.0.1', 1000, None, 32) - self.secondContactCopy = contact.Contact('2ndContactID', '192.168.0.1', 1000, None, 32) - self.firstContactDifferentValues = contact.Contact( - 'firstContactID', '192.168.1.20', 1000, None, 50) + self.contact_manager = ContactManager() + self.node_ids = [generate_id(), generate_id(), generate_id()] + self.firstContact = self.contact_manager.make_contact(self.node_ids[1], '127.0.0.1', 1000, None, 1) + self.secondContact = self.contact_manager.make_contact(self.node_ids[0], '192.168.0.1', 1000, None, 32) + self.secondContactCopy = self.contact_manager.make_contact(self.node_ids[0], '192.168.0.1', 1000, None, 32) + self.firstContactDifferentValues = self.contact_manager.make_contact(self.node_ids[1], '192.168.1.20', + 1000, None, 50) + + def testNoDuplicateContactObjects(self): + self.assertTrue(self.secondContact is self.secondContactCopy) + self.assertTrue(self.firstContact is not self.firstContactDifferentValues) def testBoolean(self): """ Test "equals" and "not equals" comparisons """ @@ -24,15 +32,6 @@ def testBoolean(self): self.secondContact, self.secondContactCopy, 'Different copies of the same Contact instance should be equal') - def testStringComparisons(self): - """ Test comparisons of Contact objects with str types """ - self.failUnlessEqual( - 'firstContactID', self.firstContact, - 'The node ID string must be equal to the contact object') - self.failIfEqual( - 'some random string', self.firstContact, - "The tested string should not be equal to the contact object (not equal to it's ID)") - def testIllogicalComparisons(self): """ Test comparisons with non-Contact and non-str types """ msg = '"{}" operator: Contact object should not be equal to {} type' diff --git a/lbrynet/tests/unit/dht/test_datastore.py b/lbrynet/tests/unit/dht/test_datastore.py index 9d50e40702..a431f4aac1 100644 --- a/lbrynet/tests/unit/dht/test_datastore.py +++ b/lbrynet/tests/unit/dht/test_datastore.py @@ -4,19 +4,19 @@ # the GNU Lesser General Public License Version 3, or any later version. # See the COPYING file included in this archive -import unittest +from twisted.trial import unittest import time +import hashlib -import lbrynet.dht.datastore -import lbrynet.dht.constants +from lbrynet.dht.datastore import DictDataStore +from lbrynet.dht import constants -import hashlib class DictDataStoreTest(unittest.TestCase): """ Basic tests case for the reference DataStore API and implementation """ def setUp(self): - self.ds = lbrynet.dht.datastore.DictDataStore() - h = hashlib.sha1() + self.ds = DictDataStore() + h = hashlib.sha384() h.update('g') hashKey = h.digest() h2 = hashlib.sha1() @@ -78,7 +78,7 @@ def testExpires(self): h2 = hashlib.sha1() h2.update('test2') key2 = h2.digest() - td = lbrynet.dht.constants.dataExpireTimeout - 100 + td = constants.dataExpireTimeout - 100 td2 = td + td self.ds.addPeerToBlob(h1, 'val1', now - td, now - td, '1') self.ds.addPeerToBlob(h1, 'val2', now - td2, now - td2, '2') @@ -128,16 +128,3 @@ def testExpires(self): # # # Read back the meta-data # for key, value in self.cases: - - - - -def suite(): - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(DictDataStoreTest)) - return suite - - -if __name__ == '__main__': - # If this module is executed from the commandline, run all its tests - unittest.TextTestRunner().run(suite()) diff --git a/lbrynet/tests/unit/dht/test_encoding.py b/lbrynet/tests/unit/dht/test_encoding.py index 159b401da6..042a664f34 100644 --- a/lbrynet/tests/unit/dht/test_encoding.py +++ b/lbrynet/tests/unit/dht/test_encoding.py @@ -4,10 +4,10 @@ # the GNU Lesser General Public License Version 3, or any later version. # See the COPYING file included in this archive -import unittest - +from twisted.trial import unittest import lbrynet.dht.encoding + class BencodeTest(unittest.TestCase): """ Basic tests case for the Bencode implementation """ def setUp(self): @@ -16,7 +16,7 @@ def setUp(self): self.cases = ((42, 'i42e'), ('spam', '4:spam'), (['spam', 42], 'l4:spami42ee'), - ({'foo':42, 'bar':'spam'}, 'd3:bar4:spam3:fooi42ee'), + ({'foo': 42, 'bar': 'spam'}, 'd3:bar4:spam3:fooi42ee'), # ...and now the "real life" tests ([['abc', '127.0.0.1', 1919], ['def', '127.0.0.1', 1921]], 'll3:abc9:127.0.0.1i1919eel3:def9:127.0.0.1i1921eee')) @@ -45,12 +45,3 @@ def testDecoder(self): for encodedValue in self.badDecoderCases: self.failUnlessRaises( lbrynet.dht.encoding.DecodeError, self.encoding.decode, encodedValue) - -def suite(): - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(BencodeTest)) - return suite - -if __name__ == '__main__': - # If this module is executed from the commandline, run all its tests - unittest.TextTestRunner().run(suite()) diff --git a/lbrynet/tests/unit/dht/test_kbucket.py b/lbrynet/tests/unit/dht/test_kbucket.py index ebfbb54787..2896076b8c 100644 --- a/lbrynet/tests/unit/dht/test_kbucket.py +++ b/lbrynet/tests/unit/dht/test_kbucket.py @@ -4,23 +4,41 @@ # the GNU Lesser General Public License Version 3, or any later version. # See the COPYING file included in this archive -import unittest - +from twisted.trial import unittest +import struct +from lbrynet.core.utils import generate_id from lbrynet.dht import kbucket -import lbrynet.dht.contact as contact +from lbrynet.dht.contact import ContactManager from lbrynet.dht import constants + +def address_generator(address=(10, 42, 42, 1)): + def increment(addr): + value = struct.unpack("I", "".join([chr(x) for x in list(addr)[::-1]]))[0] + 1 + new_addr = [] + for i in range(4): + new_addr.append(value % 256) + value >>= 8 + return tuple(new_addr[::-1]) + + while True: + yield "{}.{}.{}.{}".format(*address) + address = increment(address) + + class KBucketTest(unittest.TestCase): """ Test case for the KBucket class """ def setUp(self): - self.kbucket = kbucket.KBucket(0, 2**160) + self.address_generator = address_generator() + self.contact_manager = ContactManager() + self.kbucket = kbucket.KBucket(0, 2**constants.key_bits, generate_id()) def testAddContact(self): """ Tests if the bucket handles contact additions/updates correctly """ # Test if contacts can be added to empty list # Add k contacts to bucket for i in range(constants.k): - tmpContact = contact.Contact('tempContactID%d' % i, str(i), i, i) + tmpContact = self.contact_manager.make_contact(generate_id(), next(self.address_generator), 4444, 0, None) self.kbucket.addContact(tmpContact) self.failUnlessEqual( self.kbucket._contacts[i], @@ -28,8 +46,7 @@ def testAddContact(self): "Contact in position %d not the same as the newly-added contact" % i) # Test if contact is not added to full list - i += 1 - tmpContact = contact.Contact('tempContactID%d' % i, str(i), i, i) + tmpContact = self.contact_manager.make_contact(generate_id(), next(self.address_generator), 4444, 0, None) self.failUnlessRaises(kbucket.BucketFull, self.kbucket.addContact, tmpContact) # Test if an existing contact is updated correctly if added again @@ -48,14 +65,17 @@ def testGetContacts(self): # Add k-2 contacts + node_ids = [] if constants.k >= 2: for i in range(constants.k-2): - tmpContact = contact.Contact(i, i, i, i) + node_ids.append(generate_id()) + tmpContact = self.contact_manager.make_contact(node_ids[-1], next(self.address_generator), 4444, 0, None) self.kbucket.addContact(tmpContact) else: # add k contacts for i in range(constants.k): - tmpContact = contact.Contact(i, i, i, i) + node_ids.append(generate_id()) + tmpContact = self.contact_manager.make_contact(node_ids[-1], next(self.address_generator), 4444, 0, None) self.kbucket.addContact(tmpContact) # try to get too many contacts @@ -65,8 +85,8 @@ def testGetContacts(self): 'Returned list should not have more than k entries!') # verify returned contacts in list - for i in range(constants.k-2): - self.failIf(self.kbucket._contacts[i].id != i, + for node_id, i in zip(node_ids, range(constants.k-2)): + self.failIf(self.kbucket._contacts[i].id != node_id, "Contact in position %s not same as added contact" % (str(i))) # try to get too many contacts @@ -89,25 +109,15 @@ def testGetContacts(self): def testRemoveContact(self): # try remove contact from empty list - rmContact = contact.Contact('TestContactID1', '127.0.0.1', 1, 1) + rmContact = self.contact_manager.make_contact(generate_id(), next(self.address_generator), 4444, 0, None) self.failUnlessRaises(ValueError, self.kbucket.removeContact, rmContact) # Add couple contacts for i in range(constants.k-2): - tmpContact = contact.Contact('tmpTestContactID%d' % i, str(i), i, i) + tmpContact = self.contact_manager.make_contact(generate_id(), next(self.address_generator), 4444, 0, None) self.kbucket.addContact(tmpContact) # try remove contact from empty list self.kbucket.addContact(rmContact) result = self.kbucket.removeContact(rmContact) self.failIf(rmContact in self.kbucket._contacts, "Could not remove contact from bucket") - - -def suite(): - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(KBucketTest)) - return suite - -if __name__ == '__main__': - # If this module is executed from the commandline, run all its tests - unittest.TextTestRunner().run(suite()) diff --git a/lbrynet/tests/unit/dht/test_messages.py b/lbrynet/tests/unit/dht/test_messages.py index 36c2295b9b..6319901c65 100644 --- a/lbrynet/tests/unit/dht/test_messages.py +++ b/lbrynet/tests/unit/dht/test_messages.py @@ -4,7 +4,7 @@ # the GNU Lesser General Public License Version 3, or any later version. # See the COPYING file included in this archive -import unittest +from twisted.trial import unittest from lbrynet.dht.msgtypes import RequestMessage, ResponseMessage, ErrorMessage from lbrynet.dht.msgformat import MessageTranslator, DefaultFormat diff --git a/lbrynet/tests/unit/dht/test_node.py b/lbrynet/tests/unit/dht/test_node.py index ab73ba3e81..c612d75938 100644 --- a/lbrynet/tests/unit/dht/test_node.py +++ b/lbrynet/tests/unit/dht/test_node.py @@ -5,20 +5,21 @@ # See the COPYING file included in this archive import hashlib -import unittest +from twisted.trial import unittest import struct from twisted.internet import protocol, defer, selectreactor from lbrynet.dht.msgtypes import ResponseMessage -import lbrynet.dht.node -import lbrynet.dht.constants -import lbrynet.dht.datastore +from lbrynet.dht.node import Node +from lbrynet.dht import constants +from lbrynet.dht.datastore import DictDataStore +from lbrynet.dht.routingtable import TreeRoutingTable class NodeIDTest(unittest.TestCase): """ Test case for the Node class's ID """ def setUp(self): - self.node = lbrynet.dht.node.Node() + self.node = Node() def testAutoCreatedID(self): """ Tests if a new node has a valid node ID """ @@ -49,12 +50,10 @@ def testKeyLength(self): class NodeDataTest(unittest.TestCase): """ Test case for the Node class's data-related functions """ def setUp(self): - import lbrynet.dht.contact h = hashlib.sha384() h.update('test') - self.node = lbrynet.dht.node.Node() - self.contact = lbrynet.dht.contact.Contact(h.digest(), '127.0.0.1', 12345, - self.node._protocol) + self.node = Node() + self.contact = self.node.contact_manager.make_contact(h.digest(), '127.0.0.1', 12345, self.node._protocol) self.token = self.node.make_token(self.contact.compact_ip()) self.cases = [] for i in xrange(5): @@ -65,13 +64,8 @@ def setUp(self): @defer.inlineCallbacks def testStore(self): """ Tests if the node can store (and privately retrieve) some data """ - for key, value in self.cases: - request = { - 'port': value, - 'lbryid': self.contact.id, - 'token': self.token - } - yield self.node.store(key, request, self.contact.id, _rpcNodeContact=self.contact) + for key, port in self.cases: + yield self.node.store(self.contact, key, self.token, port, self.contact.id) for key, value in self.cases: expected_result = self.contact.compact_ip() + str(struct.pack('>H', value)) + \ self.contact.id @@ -85,189 +79,185 @@ def testStore(self): class NodeContactTest(unittest.TestCase): """ Test case for the Node class's contact management-related functions """ def setUp(self): - self.node = lbrynet.dht.node.Node() + self.node = Node() + @defer.inlineCallbacks def testAddContact(self): """ Tests if a contact can be added and retrieved correctly """ - import lbrynet.dht.contact # Create the contact h = hashlib.sha384() h.update('node1') contactID = h.digest() - contact = lbrynet.dht.contact.Contact(contactID, '127.0.0.1', 91824, self.node._protocol) + contact = self.node.contact_manager.make_contact(contactID, '127.0.0.1', 91824, self.node._protocol) # Now add it... - self.node.addContact(contact) + yield self.node.addContact(contact) # ...and request the closest nodes to it using FIND_NODE - closestNodes = self.node._routingTable.findCloseNodes(contactID, lbrynet.dht.constants.k) + closestNodes = self.node._routingTable.findCloseNodes(contactID, constants.k) self.failUnlessEqual(len(closestNodes), 1, 'Wrong amount of contacts returned; ' 'expected 1, got %d' % len(closestNodes)) self.failUnless(contact in closestNodes, 'Added contact not found by issueing ' '_findCloseNodes()') + @defer.inlineCallbacks def testAddSelfAsContact(self): """ Tests the node's behaviour when attempting to add itself as a contact """ - import lbrynet.dht.contact # Create a contact with the same ID as the local node's ID - contact = lbrynet.dht.contact.Contact(self.node.node_id, '127.0.0.1', 91824, None) + contact = self.node.contact_manager.make_contact(self.node.node_id, '127.0.0.1', 91824, None) # Now try to add it - self.node.addContact(contact) + yield self.node.addContact(contact) # ...and request the closest nodes to it using FIND_NODE closestNodes = self.node._routingTable.findCloseNodes(self.node.node_id, - lbrynet.dht.constants.k) + constants.k) self.failIf(contact in closestNodes, 'Node added itself as a contact') -class FakeRPCProtocol(protocol.DatagramProtocol): - def __init__(self): - self.reactor = selectreactor.SelectReactor() - self.testResponse = None - self.network = None - - def createNetwork(self, contactNetwork): - """ - set up a list of contacts together with their closest contacts - @param contactNetwork: a sequence of tuples, each containing a contact together with its - closest contacts: C{(, )} - """ - self.network = contactNetwork - - def sendRPC(self, contact, method, args, rawResponse=False): - """ Fake RPC protocol; allows entangled.kademlia.contact.Contact objects to "send" RPCs""" - - h = hashlib.sha384() - h.update('rpcId') - rpc_id = h.digest()[:20] - - if method == "findNode": - # get the specific contacts closest contacts - closestContacts = [] - closestContactsList = [] - for contactTuple in self.network: - if contact == contactTuple[0]: - # get the list of closest contacts for this contact - closestContactsList = contactTuple[1] - # Pack the closest contacts into a ResponseMessage - for closeContact in closestContactsList: - closestContacts.append((closeContact.id, closeContact.address, closeContact.port)) - - message = ResponseMessage(rpc_id, contact.id, closestContacts) - df = defer.Deferred() - df.callback((message, (contact.address, contact.port))) - return df - elif method == "findValue": - for contactTuple in self.network: - if contact == contactTuple[0]: - # Get the data stored by this remote contact - dataDict = contactTuple[2] - dataKey = dataDict.keys()[0] - data = dataDict.get(dataKey) - # Check if this contact has the requested value - if dataKey == args[0]: - # Return the data value - response = dataDict - print "data found at contact: " + contact.id - else: - # Return the closest contact to the requested data key - print "data not found at contact: " + contact.id - closeContacts = contactTuple[1] - closestContacts = [] - for closeContact in closeContacts: - closestContacts.append((closeContact.id, closeContact.address, - closeContact.port)) - response = closestContacts - - # Create the response message - message = ResponseMessage(rpc_id, contact.id, response) - df = defer.Deferred() - df.callback((message, (contact.address, contact.port))) - return df - - def _send(self, data, rpcID, address): - """ fake sending data """ - - -class NodeLookupTest(unittest.TestCase): - """ Test case for the Node class's iterativeFind node lookup algorithm """ - - def setUp(self): - # create a fake protocol to imitate communication with other nodes - self._protocol = FakeRPCProtocol() - # Note: The reactor is never started for this test. All deferred calls run sequentially, - # since there is no asynchronous network communication - # create the node to be tested in isolation - h = hashlib.sha384() - h.update('node1') - node_id = str(h.digest()) - self.node = lbrynet.dht.node.Node(node_id=node_id, udpPort=4000, networkProtocol=self._protocol) - self.updPort = 81173 - self.contactsAmount = 80 - # Reinitialise the routing table - self.node._routingTable = lbrynet.dht.routingtable.OptimizedTreeRoutingTable( - self.node.node_id) - - # create 160 bit node ID's for test purposes - self.testNodeIDs = [] - idNum = int(self.node.node_id.encode('hex'), 16) - for i in range(self.contactsAmount): - # create the testNodeIDs in ascending order, away from the actual node ID, - # with regards to the distance metric - self.testNodeIDs.append(str("%X" % (idNum + i + 1)).decode('hex')) - - # generate contacts - self.contacts = [] - for i in range(self.contactsAmount): - contact = lbrynet.dht.contact.Contact(self.testNodeIDs[i], "127.0.0.1", - self.updPort + i + 1, self._protocol) - self.contacts.append(contact) - - # create the network of contacts in format: (contact, closest contacts) - contactNetwork = ((self.contacts[0], self.contacts[8:15]), - (self.contacts[1], self.contacts[16:23]), - (self.contacts[2], self.contacts[24:31]), - (self.contacts[3], self.contacts[32:39]), - (self.contacts[4], self.contacts[40:47]), - (self.contacts[5], self.contacts[48:55]), - (self.contacts[6], self.contacts[56:63]), - (self.contacts[7], self.contacts[64:71]), - (self.contacts[8], self.contacts[72:79]), - (self.contacts[40], self.contacts[41:48]), - (self.contacts[41], self.contacts[41:48]), - (self.contacts[42], self.contacts[41:48]), - (self.contacts[43], self.contacts[41:48]), - (self.contacts[44], self.contacts[41:48]), - (self.contacts[45], self.contacts[41:48]), - (self.contacts[46], self.contacts[41:48]), - (self.contacts[47], self.contacts[41:48]), - (self.contacts[48], self.contacts[41:48]), - (self.contacts[50], self.contacts[0:7]), - (self.contacts[51], self.contacts[8:15]), - (self.contacts[52], self.contacts[16:23])) - - contacts_with_datastores = [] - - for contact_tuple in contactNetwork: - contacts_with_datastores.append((contact_tuple[0], contact_tuple[1], - lbrynet.dht.datastore.DictDataStore())) - self._protocol.createNetwork(contacts_with_datastores) - - @defer.inlineCallbacks - def testNodeBootStrap(self): - """ Test bootstrap with the closest possible contacts """ - - activeContacts = yield self.node._iterativeFind(self.node.node_id, self.contacts[0:8]) - # Set the expected result - expectedResult = set() - for item in self.contacts[0:6]: - expectedResult.add(item.id) - # Get the result from the deferred - - # Check the length of the active contacts - self.failUnlessEqual(activeContacts.__len__(), expectedResult.__len__(), - "More active contacts should exist, there should be %d " - "contacts but there are %d" % (len(expectedResult), - len(activeContacts))) - - # Check that the received active contacts are the same as the input contacts - self.failUnlessEqual({contact.id for contact in activeContacts}, expectedResult, - "Active should only contain the closest possible contacts" - " which were used as input for the boostrap") +# class FakeRPCProtocol(protocol.DatagramProtocol): +# def __init__(self): +# self.reactor = selectreactor.SelectReactor() +# self.testResponse = None +# self.network = None +# +# def createNetwork(self, contactNetwork): +# """ +# set up a list of contacts together with their closest contacts +# @param contactNetwork: a sequence of tuples, each containing a contact together with its +# closest contacts: C{(, )} +# """ +# self.network = contactNetwork +# +# def sendRPC(self, contact, method, args, rawResponse=False): +# """ Fake RPC protocol; allows entangled.kademlia.contact.Contact objects to "send" RPCs""" +# +# h = hashlib.sha384() +# h.update('rpcId') +# rpc_id = h.digest()[:20] +# +# if method == "findNode": +# # get the specific contacts closest contacts +# closestContacts = [] +# closestContactsList = [] +# for contactTuple in self.network: +# if contact == contactTuple[0]: +# # get the list of closest contacts for this contact +# closestContactsList = contactTuple[1] +# # Pack the closest contacts into a ResponseMessage +# for closeContact in closestContactsList: +# closestContacts.append((closeContact.id, closeContact.address, closeContact.port)) +# +# message = ResponseMessage(rpc_id, contact.id, closestContacts) +# df = defer.Deferred() +# df.callback((message, (contact.address, contact.port))) +# return df +# elif method == "findValue": +# for contactTuple in self.network: +# if contact == contactTuple[0]: +# # Get the data stored by this remote contact +# dataDict = contactTuple[2] +# dataKey = dataDict.keys()[0] +# data = dataDict.get(dataKey) +# # Check if this contact has the requested value +# if dataKey == args[0]: +# # Return the data value +# response = dataDict +# print "data found at contact: " + contact.id +# else: +# # Return the closest contact to the requested data key +# print "data not found at contact: " + contact.id +# closeContacts = contactTuple[1] +# closestContacts = [] +# for closeContact in closeContacts: +# closestContacts.append((closeContact.id, closeContact.address, +# closeContact.port)) +# response = closestContacts +# +# # Create the response message +# message = ResponseMessage(rpc_id, contact.id, response) +# df = defer.Deferred() +# df.callback((message, (contact.address, contact.port))) +# return df +# +# def _send(self, data, rpcID, address): +# """ fake sending data """ +# +# +# class NodeLookupTest(unittest.TestCase): +# """ Test case for the Node class's iterativeFind node lookup algorithm """ +# +# def setUp(self): +# # create a fake protocol to imitate communication with other nodes +# self._protocol = FakeRPCProtocol() +# # Note: The reactor is never started for this test. All deferred calls run sequentially, +# # since there is no asynchronous network communication +# # create the node to be tested in isolation +# h = hashlib.sha384() +# h.update('node1') +# node_id = str(h.digest()) +# self.node = Node(node_id, 4000, None, None, self._protocol) +# self.updPort = 81173 +# self.contactsAmount = 80 +# # Reinitialise the routing table +# self.node._routingTable = TreeRoutingTable(self.node.node_id) +# +# # create 160 bit node ID's for test purposes +# self.testNodeIDs = [] +# idNum = int(self.node.node_id.encode('hex'), 16) +# for i in range(self.contactsAmount): +# # create the testNodeIDs in ascending order, away from the actual node ID, +# # with regards to the distance metric +# self.testNodeIDs.append(str("%X" % (idNum + i + 1)).decode('hex')) +# +# # generate contacts +# self.contacts = [] +# for i in range(self.contactsAmount): +# contact = self.node.contact_manager.make_contact(self.testNodeIDs[i], "127.0.0.1", +# self.updPort + i + 1, self._protocol) +# self.contacts.append(contact) +# +# # create the network of contacts in format: (contact, closest contacts) +# contactNetwork = ((self.contacts[0], self.contacts[8:15]), +# (self.contacts[1], self.contacts[16:23]), +# (self.contacts[2], self.contacts[24:31]), +# (self.contacts[3], self.contacts[32:39]), +# (self.contacts[4], self.contacts[40:47]), +# (self.contacts[5], self.contacts[48:55]), +# (self.contacts[6], self.contacts[56:63]), +# (self.contacts[7], self.contacts[64:71]), +# (self.contacts[8], self.contacts[72:79]), +# (self.contacts[40], self.contacts[41:48]), +# (self.contacts[41], self.contacts[41:48]), +# (self.contacts[42], self.contacts[41:48]), +# (self.contacts[43], self.contacts[41:48]), +# (self.contacts[44], self.contacts[41:48]), +# (self.contacts[45], self.contacts[41:48]), +# (self.contacts[46], self.contacts[41:48]), +# (self.contacts[47], self.contacts[41:48]), +# (self.contacts[48], self.contacts[41:48]), +# (self.contacts[50], self.contacts[0:7]), +# (self.contacts[51], self.contacts[8:15]), +# (self.contacts[52], self.contacts[16:23])) +# +# contacts_with_datastores = [] +# +# for contact_tuple in contactNetwork: +# contacts_with_datastores.append((contact_tuple[0], contact_tuple[1], +# DictDataStore())) +# self._protocol.createNetwork(contacts_with_datastores) +# +# # @defer.inlineCallbacks +# # def testNodeBootStrap(self): +# # """ Test bootstrap with the closest possible contacts """ +# # # Set the expected result +# # expectedResult = {item.id for item in self.contacts[0:8]} +# # +# # activeContacts = yield self.node._iterativeFind(self.node.node_id, self.contacts[0:8]) +# # +# # # Check the length of the active contacts +# # self.failUnlessEqual(activeContacts.__len__(), expectedResult.__len__(), +# # "More active contacts should exist, there should be %d " +# # "contacts but there are %d" % (len(expectedResult), +# # len(activeContacts))) +# # +# # # Check that the received active contacts are the same as the input contacts +# # self.failUnlessEqual({contact.id for contact in activeContacts}, expectedResult, +# # "Active should only contain the closest possible contacts" +# # " which were used as input for the boostrap") diff --git a/lbrynet/tests/unit/dht/test_protocol.py b/lbrynet/tests/unit/dht/test_protocol.py index af636b6314..02b6b5adb4 100644 --- a/lbrynet/tests/unit/dht/test_protocol.py +++ b/lbrynet/tests/unit/dht/test_protocol.py @@ -1,200 +1,167 @@ -import time -import unittest -from twisted.internet.task import Clock -from twisted.internet import defer -import lbrynet.dht.protocol -import lbrynet.dht.contact -import lbrynet.dht.constants -import lbrynet.dht.msgtypes -from lbrynet.dht.error import TimeoutError -from lbrynet.dht.node import Node, rpcmethod -from lbrynet.tests.mocks import listenUDP, resolve -from lbrynet.core.call_later_manager import CallLaterManager - -import logging - -log = logging.getLogger() - - -class KademliaProtocolTest(unittest.TestCase): - """ Test case for the Protocol class """ - - udpPort = 9182 - - def setUp(self): - self._reactor = Clock() - CallLaterManager.setup(self._reactor.callLater) - self.node = Node(node_id='1' * 48, udpPort=self.udpPort, externalIP="127.0.0.1", listenUDP=listenUDP, - resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) - - def tearDown(self): - CallLaterManager.stop() - del self._reactor - - @defer.inlineCallbacks - def testReactor(self): - """ Tests if the reactor can start/stop the protocol correctly """ - - d = defer.Deferred() - self._reactor.callLater(1, d.callback, True) - self._reactor.advance(1) - result = yield d - self.assertTrue(result) - - def testRPCTimeout(self): - """ Tests if a RPC message sent to a dead remote node times out correctly """ - dead_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, - resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) - dead_node.start_listening() - dead_node.stop() - self._reactor.pump([1 for _ in range(10)]) - dead_contact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) - self.node.addContact(dead_contact) - - @rpcmethod - def fake_ping(*args, **kwargs): - time.sleep(lbrynet.dht.constants.rpcTimeout + 1) - return 'pong' - - real_ping = self.node.ping - real_timeout = lbrynet.dht.constants.rpcTimeout - real_attempts = lbrynet.dht.constants.rpcAttempts - lbrynet.dht.constants.rpcAttempts = 1 - lbrynet.dht.constants.rpcTimeout = 1 - - self.node.ping = fake_ping - # Make sure the contact was added - self.failIf(dead_contact not in self.node.contacts, - 'Contact not added to fake node (error in test code)') - self.node.start_listening() - - # Run the PING RPC (which should raise a timeout error) - df = self.node._protocol.sendRPC(dead_contact, 'ping', {}) - - def check_timeout(err): - self.assertEqual(err.type, TimeoutError) - - df.addErrback(check_timeout) - - def reset_values(): - self.node.ping = real_ping - lbrynet.dht.constants.rpcTimeout = real_timeout - lbrynet.dht.constants.rpcAttempts = real_attempts - - # See if the contact was removed due to the timeout - def check_removed_contact(): - self.failIf(dead_contact in self.node.contacts, - 'Contact was not removed after RPC timeout; check exception types.') - - df.addCallback(lambda _: reset_values()) - - # Stop the reactor if a result arrives (timeout or not) - df.addCallback(lambda _: check_removed_contact()) - self._reactor.pump([1 for _ in range(20)]) - - def testRPCRequest(self): - """ Tests if a valid RPC request is executed and responded to correctly """ - - remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, - resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) - remote_node.start_listening() - remoteContact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) - self.node.addContact(remoteContact) - - self.error = None - - def handleError(f): - self.error = 'An RPC error occurred: %s' % f.getErrorMessage() - - def handleResult(result): - expectedResult = 'pong' - if result != expectedResult: - self.error = 'Result from RPC is incorrect; expected "%s", got "%s"' \ - % (expectedResult, result) - - # Publish the "local" node on the network - self.node.start_listening() - # Simulate the RPC - df = remoteContact.ping() - df.addCallback(handleResult) - df.addErrback(handleError) - - for _ in range(10): - self._reactor.advance(1) - - self.failIf(self.error, self.error) - # The list of sent RPC messages should be empty at this stage - self.failUnlessEqual(len(self.node._protocol._sentMessages), 0, - 'The protocol is still waiting for a RPC result, ' - 'but the transaction is already done!') - - def testRPCAccess(self): - """ Tests invalid RPC requests - Verifies that a RPC request for an existing but unpublished - method is denied, and that the associated (remote) exception gets - raised locally """ - remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, - resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) - remote_node.start_listening() - remote_contact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) - self.node.addContact(remote_contact) - - self.error = None - - def handleError(f): - try: - f.raiseException() - except AttributeError, e: - # This is the expected outcome since the remote node did not publish the method - self.error = None - except Exception, e: - self.error = 'The remote method failed, but the wrong exception was raised; ' \ - 'expected AttributeError, got %s' % type(e) - - def handleResult(result): - self.error = 'The remote method executed successfully, returning: "%s"; ' \ - 'this RPC should not have been allowed.' % result - - self.node.start_listening() - self._reactor.pump([1 for _ in range(10)]) - # Simulate the RPC - df = remote_contact.not_a_rpc_function() - df.addCallback(handleResult) - df.addErrback(handleError) - self._reactor.pump([1 for _ in range(10)]) - self.failIf(self.error, self.error) - # The list of sent RPC messages should be empty at this stage - self.failUnlessEqual(len(self.node._protocol._sentMessages), 0, - 'The protocol is still waiting for a RPC result, ' - 'but the transaction is already done!') - - def testRPCRequestArgs(self): - """ Tests if an RPC requiring arguments is executed correctly """ - remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, - resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) - remote_node.start_listening() - remote_contact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) - self.node.addContact(remote_contact) - self.error = None - - def handleError(f): - self.error = 'An RPC error occurred: %s' % f.getErrorMessage() - - def handleResult(result): - expectedResult = 'pong' - if result != expectedResult: - self.error = 'Result from RPC is incorrect; expected "%s", got "%s"' % \ - (expectedResult, result) - - # Publish the "local" node on the network - self.node.start_listening() - # Simulate the RPC - df = remote_contact.ping() - df.addCallback(handleResult) - df.addErrback(handleError) - self._reactor.pump([1 for _ in range(10)]) - self.failIf(self.error, self.error) - # The list of sent RPC messages should be empty at this stage - self.failUnlessEqual(len(self.node._protocol._sentMessages), 0, - 'The protocol is still waiting for a RPC result, ' - 'but the transaction is already done!') +# import time +# import unittest +# import twisted.internet.selectreactor +# +# import lbrynet.dht.protocol +# import lbrynet.dht.contact +# import lbrynet.dht.constants +# import lbrynet.dht.msgtypes +# from lbrynet.dht.error import TimeoutError +# from lbrynet.dht.node import Node, rpcmethod +# +# +# class KademliaProtocolTest(unittest.TestCase): +# """ Test case for the Protocol class """ +# +# def setUp(self): +# del lbrynet.dht.protocol.reactor +# lbrynet.dht.protocol.reactor = twisted.internet.selectreactor.SelectReactor() +# self.node = Node(node_id='1' * 48, udpPort=9182, externalIP="127.0.0.1") +# self.protocol = lbrynet.dht.protocol.KademliaProtocol(self.node) +# +# def testReactor(self): +# """ Tests if the reactor can start/stop the protocol correctly """ +# lbrynet.dht.protocol.reactor.listenUDP(0, self.protocol) +# lbrynet.dht.protocol.reactor.callLater(0, lbrynet.dht.protocol.reactor.stop) +# lbrynet.dht.protocol.reactor.run() +# +# def testRPCTimeout(self): +# """ Tests if a RPC message sent to a dead remote node times out correctly """ +# +# @rpcmethod +# def fake_ping(*args, **kwargs): +# time.sleep(lbrynet.dht.constants.rpcTimeout + 1) +# return 'pong' +# +# real_ping = self.node.ping +# real_timeout = lbrynet.dht.constants.rpcTimeout +# real_attempts = lbrynet.dht.constants.rpcAttempts +# lbrynet.dht.constants.rpcAttempts = 1 +# lbrynet.dht.constants.rpcTimeout = 1 +# self.node.ping = fake_ping +# deadContact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.1', 9182, self.protocol) +# self.node.addContact(deadContact) +# # Make sure the contact was added +# self.failIf(deadContact not in self.node.contacts, +# 'Contact not added to fake node (error in test code)') +# lbrynet.dht.protocol.reactor.listenUDP(9182, self.protocol) +# +# # Run the PING RPC (which should raise a timeout error) +# df = self.protocol.sendRPC(deadContact, 'ping', {}) +# +# def check_timeout(err): +# self.assertEqual(type(err), TimeoutError) +# +# df.addErrback(check_timeout) +# +# def reset_values(): +# self.node.ping = real_ping +# lbrynet.dht.constants.rpcTimeout = real_timeout +# lbrynet.dht.constants.rpcAttempts = real_attempts +# +# # See if the contact was removed due to the timeout +# def check_removed_contact(): +# self.failIf(deadContact in self.node.contacts, +# 'Contact was not removed after RPC timeout; check exception types.') +# +# df.addCallback(lambda _: reset_values()) +# +# # Stop the reactor if a result arrives (timeout or not) +# df.addBoth(lambda _: lbrynet.dht.protocol.reactor.stop()) +# df.addCallback(lambda _: check_removed_contact()) +# lbrynet.dht.protocol.reactor.run() +# +# def testRPCRequest(self): +# """ Tests if a valid RPC request is executed and responded to correctly """ +# remoteContact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.1', 9182, self.protocol) +# self.node.addContact(remoteContact) +# self.error = None +# +# def handleError(f): +# self.error = 'An RPC error occurred: %s' % f.getErrorMessage() +# +# def handleResult(result): +# expectedResult = 'pong' +# if result != expectedResult: +# self.error = 'Result from RPC is incorrect; expected "%s", got "%s"' \ +# % (expectedResult, result) +# +# # Publish the "local" node on the network +# lbrynet.dht.protocol.reactor.listenUDP(9182, self.protocol) +# # Simulate the RPC +# df = remoteContact.ping() +# df.addCallback(handleResult) +# df.addErrback(handleError) +# df.addBoth(lambda _: lbrynet.dht.protocol.reactor.stop()) +# lbrynet.dht.protocol.reactor.run() +# self.failIf(self.error, self.error) +# # The list of sent RPC messages should be empty at this stage +# self.failUnlessEqual(len(self.protocol._sentMessages), 0, +# 'The protocol is still waiting for a RPC result, ' +# 'but the transaction is already done!') +# +# def testRPCAccess(self): +# """ Tests invalid RPC requests +# Verifies that a RPC request for an existing but unpublished +# method is denied, and that the associated (remote) exception gets +# raised locally """ +# remoteContact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.1', 9182, self.protocol) +# self.node.addContact(remoteContact) +# self.error = None +# +# def handleError(f): +# try: +# f.raiseException() +# except AttributeError, e: +# # This is the expected outcome since the remote node did not publish the method +# self.error = None +# except Exception, e: +# self.error = 'The remote method failed, but the wrong exception was raised; ' \ +# 'expected AttributeError, got %s' % type(e) +# +# def handleResult(result): +# self.error = 'The remote method executed successfully, returning: "%s"; ' \ +# 'this RPC should not have been allowed.' % result +# +# # Publish the "local" node on the network +# lbrynet.dht.protocol.reactor.listenUDP(9182, self.protocol) +# # Simulate the RPC +# df = remoteContact.not_a_rpc_function() +# df.addCallback(handleResult) +# df.addErrback(handleError) +# df.addBoth(lambda _: lbrynet.dht.protocol.reactor.stop()) +# lbrynet.dht.protocol.reactor.run() +# self.failIf(self.error, self.error) +# # The list of sent RPC messages should be empty at this stage +# self.failUnlessEqual(len(self.protocol._sentMessages), 0, +# 'The protocol is still waiting for a RPC result, ' +# 'but the transaction is already done!') +# +# def testRPCRequestArgs(self): +# """ Tests if an RPC requiring arguments is executed correctly """ +# remoteContact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.1', 9182, self.protocol) +# self.node.addContact(remoteContact) +# self.error = None +# +# def handleError(f): +# self.error = 'An RPC error occurred: %s' % f.getErrorMessage() +# +# def handleResult(result): +# expectedResult = 'pong' +# if result != expectedResult: +# self.error = 'Result from RPC is incorrect; expected "%s", got "%s"' % \ +# (expectedResult, result) +# +# # Publish the "local" node on the network +# lbrynet.dht.protocol.reactor.listenUDP(9182, self.protocol) +# # Simulate the RPC +# df = remoteContact.ping() +# df.addCallback(handleResult) +# df.addErrback(handleError) +# df.addBoth(lambda _: lbrynet.dht.protocol.reactor.stop()) +# lbrynet.dht.protocol.reactor.run() +# self.failIf(self.error, self.error) +# # The list of sent RPC messages should be empty at this stage +# self.failUnlessEqual(len(self.protocol._sentMessages), 0, +# 'The protocol is still waiting for a RPC result, ' +# 'but the transaction is already done!') diff --git a/lbrynet/tests/unit/dht/test_routingtable.py b/lbrynet/tests/unit/dht/test_routingtable.py index 8c09075096..1c6e480981 100644 --- a/lbrynet/tests/unit/dht/test_routingtable.py +++ b/lbrynet/tests/unit/dht/test_routingtable.py @@ -1,29 +1,16 @@ import hashlib -import unittest - -import lbrynet.dht.constants -import lbrynet.dht.routingtable -import lbrynet.dht.contact -import lbrynet.dht.node -import lbrynet.dht.distance +from twisted.trial import unittest +from twisted.internet import defer +from lbrynet.dht import constants +from lbrynet.dht.routingtable import TreeRoutingTable +from lbrynet.dht.contact import ContactManager +from lbrynet.dht.distance import Distance class FakeRPCProtocol(object): """ Fake RPC protocol; allows lbrynet.dht.contact.Contact objects to "send" RPCs """ def sendRPC(self, *args, **kwargs): - return FakeDeferred() - - -class FakeDeferred(object): - """ Fake Twisted Deferred object; allows the routing table to add callbacks that do nothing """ - def addCallback(self, *args, **kwargs): - return - - def addErrback(self, *args, **kwargs): - return - - def addCallbacks(self, *args, **kwargs): - return + return defer.succeed(None) class TreeRoutingTableTest(unittest.TestCase): @@ -31,9 +18,10 @@ class TreeRoutingTableTest(unittest.TestCase): def setUp(self): h = hashlib.sha384() h.update('node1') + self.contact_manager = ContactManager() self.nodeID = h.digest() self.protocol = FakeRPCProtocol() - self.routingTable = lbrynet.dht.routingtable.TreeRoutingTable(self.nodeID) + self.routingTable = TreeRoutingTable(self.nodeID) def testDistance(self): """ Test to see if distance method returns correct result""" @@ -42,86 +30,91 @@ def testDistance(self): basicTestList = [('123456789', '123456789', 0L), ('12345', '98765', 34527773184L)] for test in basicTestList: - result = lbrynet.dht.distance.Distance(test[0])(test[1]) + result = Distance(test[0])(test[1]) self.failIf(result != test[2], 'Result of _distance() should be %s but %s returned' % (test[2], result)) baseIp = '146.64.19.111' ipTestList = ['146.64.29.222', '192.68.19.333'] - distanceOne = lbrynet.dht.distance.Distance(baseIp)(ipTestList[0]) - distanceTwo = lbrynet.dht.distance.Distance(baseIp)(ipTestList[1]) + distanceOne = Distance(baseIp)(ipTestList[0]) + distanceTwo = Distance(baseIp)(ipTestList[1]) self.failIf(distanceOne > distanceTwo, '%s should be closer to the base ip %s than %s' % (ipTestList[0], baseIp, ipTestList[1])) + @defer.inlineCallbacks def testAddContact(self): """ Tests if a contact can be added and retrieved correctly """ # Create the contact h = hashlib.sha384() h.update('node2') contactID = h.digest() - contact = lbrynet.dht.contact.Contact(contactID, '127.0.0.1', 91824, self.protocol) + contact = self.contact_manager.make_contact(contactID, '127.0.0.1', 91824, self.protocol) # Now add it... - self.routingTable.addContact(contact) + yield self.routingTable.addContact(contact) # ...and request the closest nodes to it (will retrieve it) - closestNodes = self.routingTable.findCloseNodes(contactID, lbrynet.dht.constants.k) + closestNodes = self.routingTable.findCloseNodes(contactID, constants.k) self.failUnlessEqual(len(closestNodes), 1, 'Wrong amount of contacts returned; expected 1,' ' got %d' % len(closestNodes)) self.failUnless(contact in closestNodes, 'Added contact not found by issueing ' '_findCloseNodes()') + @defer.inlineCallbacks def testGetContact(self): """ Tests if a specific existing contact can be retrieved correctly """ h = hashlib.sha384() h.update('node2') contactID = h.digest() - contact = lbrynet.dht.contact.Contact(contactID, '127.0.0.1', 91824, self.protocol) + contact = self.contact_manager.make_contact(contactID, '127.0.0.1', 91824, self.protocol) # Now add it... - self.routingTable.addContact(contact) + yield self.routingTable.addContact(contact) # ...and get it again sameContact = self.routingTable.getContact(contactID) self.failUnlessEqual(contact, sameContact, 'getContact() should return the same contact') + @defer.inlineCallbacks def testAddParentNodeAsContact(self): """ Tests the routing table's behaviour when attempting to add its parent node as a contact """ # Create a contact with the same ID as the local node's ID - contact = lbrynet.dht.contact.Contact(self.nodeID, '127.0.0.1', 91824, self.protocol) + contact = self.contact_manager.make_contact(self.nodeID, '127.0.0.1', 91824, self.protocol) # Now try to add it - self.routingTable.addContact(contact) + yield self.routingTable.addContact(contact) # ...and request the closest nodes to it using FIND_NODE - closestNodes = self.routingTable.findCloseNodes(self.nodeID, lbrynet.dht.constants.k) + closestNodes = self.routingTable.findCloseNodes(self.nodeID, constants.k) self.failIf(contact in closestNodes, 'Node added itself as a contact') + @defer.inlineCallbacks def testRemoveContact(self): """ Tests contact removal """ # Create the contact h = hashlib.sha384() h.update('node2') contactID = h.digest() - contact = lbrynet.dht.contact.Contact(contactID, '127.0.0.1', 91824, self.protocol) + contact = self.contact_manager.make_contact(contactID, '127.0.0.1', 91824, self.protocol) # Now add it... - self.routingTable.addContact(contact) + yield self.routingTable.addContact(contact) # Verify addition self.failUnlessEqual(len(self.routingTable._buckets[0]), 1, 'Contact not added properly') # Now remove it - self.routingTable.removeContact(contact.id) + self.routingTable.removeContact(contact) self.failUnlessEqual(len(self.routingTable._buckets[0]), 0, 'Contact not removed properly') + @defer.inlineCallbacks def testSplitBucket(self): """ Tests if the the routing table correctly dynamically splits k-buckets """ self.failUnlessEqual(self.routingTable._buckets[0].rangeMax, 2**384, 'Initial k-bucket range should be 0 <= range < 2**384') # Add k contacts - for i in range(lbrynet.dht.constants.k): + for i in range(constants.k): h = hashlib.sha384() h.update('remote node %d' % i) nodeID = h.digest() - contact = lbrynet.dht.contact.Contact(nodeID, '127.0.0.1', 91824, self.protocol) - self.routingTable.addContact(contact) + contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 91824, self.protocol) + yield self.routingTable.addContact(contact) self.failUnlessEqual(len(self.routingTable._buckets), 1, 'Only k nodes have been added; the first k-bucket should now ' 'be full, but should not yet be split') @@ -129,8 +122,8 @@ def testSplitBucket(self): h = hashlib.sha384() h.update('yet another remote node') nodeID = h.digest() - contact = lbrynet.dht.contact.Contact(nodeID, '127.0.0.1', 91824, self.protocol) - self.routingTable.addContact(contact) + contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 91824, self.protocol) + yield self.routingTable.addContact(contact) self.failUnlessEqual(len(self.routingTable._buckets), 2, 'k+1 nodes have been added; the first k-bucket should have been ' 'split into two new buckets') @@ -144,99 +137,113 @@ def testSplitBucket(self): 'K-bucket was split, but the min/max ranges were ' 'not divided properly') - def testFullBucketNoSplit(self): + @defer.inlineCallbacks + def testFullSplit(self): """ Test that a bucket is not split if it full, but does not cover the range containing the parent node's ID """ + self.routingTable._parentNodeID = 49 * 'a' # more than 384 bits; this will not be in the range of _any_ k-bucket + + node_ids = [ + "d4a27096d81e3c4efacce9f940e887c956f736f859c8037b556efec6fdda5c388ae92bae96b9eb204b24da2f376c4282", + "553c0bfe119c35247c8cb8124091acb5c05394d5be7b019f6b1a5e18036af7a6148711ad6d47a0f955047bf9eac868aa", + "671a179c251c90863f46e7ef54264cbbad743fe3127871064d8f051ce4124fcbd893339e11358f621655e37bd6a74097", + "f896bafeb7ffb14b92986e3b08ee06807fdd5be34ab43f4f52559a5bbf0f12dedcd8556801f97c334b3ac9be7a0f7a93", + "33a7deb380eb4707211184798b66840c22c396e8cde00b75b64f9ead09bad1141b56d35a93bd511adb28c6708eecc39d", + "5e1e8ca575b536ae5ec52f7766ada904a64ebaad805909b1067ec3c984bf99909c9fcdd37e04ea5c5c043ea8830100ce", + "ee18857d0c1f7fc413424f3ffead4871f2499646d4c2ac16f35f0c8864318ca21596915f18f85a3a25f8ceaa56c844aa", + "68039f78fbf130873e7cce2f71f39d217dcb7f3fe562d64a85de4e21ee980b4a800f51bf6851d2bbf10e6590fe0d46b2" + ] + # Add k contacts - for i in range(lbrynet.dht.constants.k): + for i in range(constants.k): h = hashlib.sha384() h.update('remote node %d' % i) nodeID = h.digest() - contact = lbrynet.dht.contact.Contact(nodeID, '127.0.0.1', 91824, self.protocol) - self.routingTable.addContact(contact) - self.failUnlessEqual(len(self.routingTable._buckets), 1, 'Only k nodes have been added; ' - 'the first k-bucket should now be ' - 'full, and there should not be ' - 'more than 1 bucket') - self.failUnlessEqual(len(self.routingTable._buckets[0]._contacts), lbrynet.dht.constants.k, - 'Bucket should have k contacts; expected %d got %d' % - (lbrynet.dht.constants.k, - len(self.routingTable._buckets[0]._contacts))) - # Now add 1 more contact + self.assertEquals(nodeID, node_ids[i].decode('hex')) + contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 91824, self.protocol) + yield self.routingTable.addContact(contact) + self.failUnlessEqual(len(self.routingTable._buckets), 1) + self.failUnlessEqual(len(self.routingTable._buckets[0]._contacts), constants.k) + + # try adding a contact who is further from us than the k'th known contact h = hashlib.sha384() - h.update('yet another remote node') + h.update('yet another remote node!') nodeID = h.digest() - contact = lbrynet.dht.contact.Contact(nodeID, '127.0.0.1', 91824, self.protocol) - self.routingTable.addContact(contact) - self.failUnlessEqual(len(self.routingTable._buckets), 1, - 'There should not be more than 1 bucket, since the bucket ' - 'should not have been split (parent node ID not in range)') - self.failUnlessEqual(len(self.routingTable._buckets[0]._contacts), - lbrynet.dht.constants.k, 'Bucket should have k contacts; ' - 'expected %d got %d' % - (lbrynet.dht.constants.k, - len(self.routingTable._buckets[0]._contacts))) - self.failIf(contact in self.routingTable._buckets[0]._contacts, - 'New contact should have been discarded (since RPC is faked in this test)') - - -class KeyErrorFixedTest(unittest.TestCase): - """ Basic tests case for boolean operators on the Contact class """ + contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 91824, self.protocol) + yield self.routingTable.addContact(contact) + self.failUnlessEqual(len(self.routingTable._buckets), 1) + self.failUnlessEqual(len(self.routingTable._buckets[0]._contacts), constants.k) + self.failIf(contact in self.routingTable._buckets[0]._contacts) - def setUp(self): - own_id = (2 ** lbrynet.dht.constants.key_bits) - 1 - # carefully chosen own_id. here's the logic - # we want a bunch of buckets (k+1, to be exact), and we want to make sure own_id - # is not in bucket 0. so we put own_id at the end so we can keep splitting by adding to the - # end - - self.table = lbrynet.dht.routingtable.OptimizedTreeRoutingTable(own_id) - - def fill_bucket(self, bucket_min): - bucket_size = lbrynet.dht.constants.k - for i in range(bucket_min, bucket_min + bucket_size): - self.table.addContact(lbrynet.dht.contact.Contact(long(i), '127.0.0.1', 9999, None)) - - def overflow_bucket(self, bucket_min): - bucket_size = lbrynet.dht.constants.k - self.fill_bucket(bucket_min) - self.table.addContact( - lbrynet.dht.contact.Contact(long(bucket_min + bucket_size + 1), - '127.0.0.1', 9999, None)) - - def testKeyError(self): - - # find middle, so we know where bucket will split - bucket_middle = self.table._buckets[0].rangeMax / 2 - - # fill last bucket - self.fill_bucket(self.table._buckets[0].rangeMax - lbrynet.dht.constants.k - 1) - # -1 in previous line because own_id is in last bucket - - # fill/overflow 7 more buckets - bucket_start = 0 - for i in range(0, lbrynet.dht.constants.k): - self.overflow_bucket(bucket_start) - bucket_start += bucket_middle / (2 ** i) - - # replacement cache now has k-1 entries. - # adding one more contact to bucket 0 used to cause a KeyError, but it should work - self.table.addContact( - lbrynet.dht.contact.Contact(long(lbrynet.dht.constants.k + 2), '127.0.0.1', 9999, None)) - - # import math - # print "" - # for i, bucket in enumerate(self.table._buckets): - # print "Bucket " + str(i) + " (2 ** " + str( - # math.log(bucket.rangeMin, 2) if bucket.rangeMin > 0 else 0) + " <= x < 2 ** "+str( - # math.log(bucket.rangeMax, 2)) + ")" - # for c in bucket.getContacts(): - # print " contact " + str(c.id) - # for key, bucket in self.table._replacementCache.iteritems(): - # print "Replacement Cache for Bucket " + str(key) - # for c in bucket: - # print " contact " + str(c.id) + # try adding a contact who is closer to us than the k'th known contact + h = hashlib.sha384() + h.update('yet another remote node') + nodeID = h.digest() + contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 91824, self.protocol) + yield self.routingTable.addContact(contact) + self.failUnlessEqual(len(self.routingTable._buckets), 2) + self.failUnlessEqual(len(self.routingTable._buckets[0]._contacts), 5) + self.failUnlessEqual(len(self.routingTable._buckets[1]._contacts), 4) + self.failIf(contact not in self.routingTable._buckets[1]._contacts) + + +# class KeyErrorFixedTest(unittest.TestCase): +# """ Basic tests case for boolean operators on the Contact class """ +# +# def setUp(self): +# own_id = (2 ** constants.key_bits) - 1 +# # carefully chosen own_id. here's the logic +# # we want a bunch of buckets (k+1, to be exact), and we want to make sure own_id +# # is not in bucket 0. so we put own_id at the end so we can keep splitting by adding to the +# # end +# +# self.table = lbrynet.dht.routingtable.OptimizedTreeRoutingTable(own_id) +# +# def fill_bucket(self, bucket_min): +# bucket_size = lbrynet.dht.constants.k +# for i in range(bucket_min, bucket_min + bucket_size): +# self.table.addContact(lbrynet.dht.contact.Contact(long(i), '127.0.0.1', 9999, None)) +# +# def overflow_bucket(self, bucket_min): +# bucket_size = lbrynet.dht.constants.k +# self.fill_bucket(bucket_min) +# self.table.addContact( +# lbrynet.dht.contact.Contact(long(bucket_min + bucket_size + 1), +# '127.0.0.1', 9999, None)) +# +# def testKeyError(self): +# +# # find middle, so we know where bucket will split +# bucket_middle = self.table._buckets[0].rangeMax / 2 +# +# # fill last bucket +# self.fill_bucket(self.table._buckets[0].rangeMax - lbrynet.dht.constants.k - 1) +# # -1 in previous line because own_id is in last bucket +# +# # fill/overflow 7 more buckets +# bucket_start = 0 +# for i in range(0, lbrynet.dht.constants.k): +# self.overflow_bucket(bucket_start) +# bucket_start += bucket_middle / (2 ** i) +# +# # replacement cache now has k-1 entries. +# # adding one more contact to bucket 0 used to cause a KeyError, but it should work +# self.table.addContact( +# lbrynet.dht.contact.Contact(long(lbrynet.dht.constants.k + 2), '127.0.0.1', 9999, None)) +# +# # import math +# # print "" +# # for i, bucket in enumerate(self.table._buckets): +# # print "Bucket " + str(i) + " (2 ** " + str( +# # math.log(bucket.rangeMin, 2) if bucket.rangeMin > 0 else 0) + " <= x < 2 ** "+str( +# # math.log(bucket.rangeMax, 2)) + ")" +# # for c in bucket.getContacts(): +# # print " contact " + str(c.id) +# # for key, bucket in self.table._replacementCache.iteritems(): +# # print "Replacement Cache for Bucket " + str(key) +# # for c in bucket: +# # print " contact " + str(c.id) diff --git a/lbrynet/tests/util.py b/lbrynet/tests/util.py index e6ad2005cc..68b445c8ec 100644 --- a/lbrynet/tests/util.py +++ b/lbrynet/tests/util.py @@ -5,20 +5,11 @@ import tempfile import shutil import mock -import logging -from lbrynet.dht.encoding import Bencode -from lbrynet.dht.error import DecodeError -from lbrynet.dht.msgformat import DefaultFormat -from lbrynet.dht.msgtypes import ResponseMessage, RequestMessage, ErrorMessage -_encode = Bencode() -_datagram_formatter = DefaultFormat() DEFAULT_TIMESTAMP = datetime.datetime(2016, 1, 1) DEFAULT_ISO_TIME = time.mktime(DEFAULT_TIMESTAMP.timetuple()) -log = logging.getLogger("lbrynet.tests.util") - def mk_db_and_blob_dir(): db_dir = tempfile.mkdtemp() @@ -49,28 +40,5 @@ def resetTime(test_case, timestamp=DEFAULT_TIMESTAMP): patcher.start().return_value = timestamp test_case.addCleanup(patcher.stop) - def is_android(): return 'ANDROID_ARGUMENT' in os.environ # detect Android using the Kivy way - - -def debug_kademlia_packet(data, source, destination, node): - if log.level != logging.DEBUG: - return - try: - packet = _datagram_formatter.fromPrimitive(_encode.decode(data)) - if isinstance(packet, RequestMessage): - log.debug("request %s --> %s %s (node time %s)", source[0], destination[0], packet.request, - node.clock.seconds()) - elif isinstance(packet, ResponseMessage): - if isinstance(packet.response, (str, unicode)): - log.debug("response %s <-- %s %s (node time %s)", destination[0], source[0], packet.response, - node.clock.seconds()) - else: - log.debug("response %s <-- %s %i contacts (node time %s)", destination[0], source[0], - len(packet.response), node.clock.seconds()) - elif isinstance(packet, ErrorMessage): - log.error("error %s <-- %s %s (node time %s)", destination[0], source[0], packet.exceptionType, - node.clock.seconds()) - except DecodeError: - log.exception("decode error %s --> %s (node time %s)", source[0], destination[0], node.clock.seconds()) From d250e4d91a174097b8fcc107981b01f7d085a1a8 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:31:17 -0400 Subject: [PATCH 31/79] add iterative find test --- .../functional/dht/test_iterative_find.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 lbrynet/tests/functional/dht/test_iterative_find.py diff --git a/lbrynet/tests/functional/dht/test_iterative_find.py b/lbrynet/tests/functional/dht/test_iterative_find.py new file mode 100644 index 0000000000..f38caf6049 --- /dev/null +++ b/lbrynet/tests/functional/dht/test_iterative_find.py @@ -0,0 +1,27 @@ +from lbrynet.dht import constants +from lbrynet.dht.distance import Distance +from dht_test_environment import TestKademliaBase +import logging + +log = logging.getLogger() + + +class TestFindNode(TestKademliaBase): + """ + This tests the local routing table lookup for a node, every node should return the sorted k contacts closest + to the querying node (even if the key being looked up is known) + """ + network_size = 35 + + def test_find_node(self): + last_node_id = self.nodes[-1].node_id.encode('hex') + to_last_node = Distance(last_node_id.decode('hex')) + for n in self.nodes: + find_close_nodes_result = n._routingTable.findCloseNodes(last_node_id.decode('hex'), constants.k) + self.assertTrue(len(find_close_nodes_result) == constants.k) + found_ids = [c.id.encode('hex') for c in find_close_nodes_result] + self.assertListEqual(found_ids, sorted(found_ids, key=lambda x: to_last_node(x.decode('hex')))) + if last_node_id in [c.id.encode('hex') for c in n.contacts]: + self.assertTrue(found_ids[0] == last_node_id) + else: + self.assertTrue(last_node_id not in found_ids) From d3f4155a2b12ce10ee0403827fb9d354d570134c Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:31:31 -0400 Subject: [PATCH 32/79] add unit tests for contact_is_good --- lbrynet/tests/unit/dht/test_contact.py | 116 +++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/lbrynet/tests/unit/dht/test_contact.py b/lbrynet/tests/unit/dht/test_contact.py index b150e2fbf4..c9ebdc7461 100644 --- a/lbrynet/tests/unit/dht/test_contact.py +++ b/lbrynet/tests/unit/dht/test_contact.py @@ -46,3 +46,119 @@ def testIllogicalComparisons(self): def testCompactIP(self): self.assertEqual(self.firstContact.compact_ip(), '\x7f\x00\x00\x01') self.assertEqual(self.secondContact.compact_ip(), '\xc0\xa8\x00\x01') + + +class TestContactLastReplied(unittest.TestCase): + def setUp(self): + self.clock = task.Clock() + self.contact_manager = ContactManager(self.clock.seconds) + self.contact = self.contact_manager.make_contact(generate_id(), "127.0.0.1", 4444, None) + self.clock.advance(3600) + self.assertTrue(self.contact.contact_is_good is None) + + def test_stale_replied_to_us(self): + self.contact.update_last_replied() + self.assertTrue(self.contact.contact_is_good is True) + + def test_stale_requested_from_us(self): + self.contact.update_last_requested() + self.assertTrue(self.contact.contact_is_good is None) + + def test_stale_then_fail(self): + self.contact.update_last_failed() + self.assertTrue(self.contact.contact_is_good is None) + self.clock.advance(1) + self.contact.update_last_failed() + self.assertTrue(self.contact.contact_is_good is False) + + def test_good_turned_stale(self): + self.contact.update_last_replied() + self.assertTrue(self.contact.contact_is_good is True) + self.clock.advance((constants.refreshTimeout / 4) - 1) + self.assertTrue(self.contact.contact_is_good is True) + self.clock.advance(1) + self.assertTrue(self.contact.contact_is_good is None) + + def test_good_then_fail(self): + self.contact.update_last_replied() + self.assertTrue(self.contact.contact_is_good is True) + self.clock.advance(1) + self.contact.update_last_failed() + self.assertTrue(self.contact.contact_is_good is True) + self.clock.advance(59) + self.assertTrue(self.contact.contact_is_good is True) + self.contact.update_last_failed() + self.assertTrue(self.contact.contact_is_good is False) + for _ in range(7200): + self.clock.advance(60) + self.assertTrue(self.contact.contact_is_good is False) + + def test_good_then_fail_then_good(self): + # it replies + self.contact.update_last_replied() + self.assertTrue(self.contact.contact_is_good is True) + self.clock.advance(1) + + # it fails twice in a row + self.contact.update_last_failed() + self.clock.advance(1) + self.contact.update_last_failed() + self.assertTrue(self.contact.contact_is_good is False) + self.clock.advance(1) + + # it replies + self.contact.update_last_replied() + self.clock.advance(1) + self.assertTrue(self.contact.contact_is_good is True) + + # it goes stale + self.clock.advance((constants.refreshTimeout / 4) - 2) + self.assertTrue(self.contact.contact_is_good is True) + self.clock.advance(1) + self.assertTrue(self.contact.contact_is_good is None) + + +class TestContactLastRequested(unittest.TestCase): + def setUp(self): + self.clock = task.Clock() + self.contact_manager = ContactManager(self.clock.seconds) + self.contact = self.contact_manager.make_contact(generate_id(), "127.0.0.1", 4444, None) + self.clock.advance(1) + self.contact.update_last_replied() + self.clock.advance(3600) + self.assertTrue(self.contact.contact_is_good is None) + + def test_previous_replied_then_requested(self): + # it requests + self.contact.update_last_requested() + self.assertTrue(self.contact.contact_is_good is True) + + # it goes stale + self.clock.advance((constants.refreshTimeout / 4) - 1) + self.assertTrue(self.contact.contact_is_good is True) + self.clock.advance(1) + self.assertTrue(self.contact.contact_is_good is None) + + def test_previous_replied_then_requested_then_failed(self): + # it requests + self.contact.update_last_requested() + self.assertTrue(self.contact.contact_is_good is True) + self.clock.advance(1) + + # it fails twice in a row + self.contact.update_last_failed() + self.clock.advance(1) + self.contact.update_last_failed() + self.assertTrue(self.contact.contact_is_good is False) + self.clock.advance(1) + + # it requests + self.contact.update_last_requested() + self.clock.advance(1) + self.assertTrue(self.contact.contact_is_good is False) + + # it goes stale + self.clock.advance((constants.refreshTimeout / 4) - 2) + self.assertTrue(self.contact.contact_is_good is False) + self.clock.advance(1) + self.assertTrue(self.contact.contact_is_good is False) From 7da70cc02d958614a16bdbb1e58b4da0a670f3be Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 18:31:41 -0400 Subject: [PATCH 33/79] test peer expiration --- .../functional/dht/test_contact_expiration.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 lbrynet/tests/functional/dht/test_contact_expiration.py diff --git a/lbrynet/tests/functional/dht/test_contact_expiration.py b/lbrynet/tests/functional/dht/test_contact_expiration.py new file mode 100644 index 0000000000..f7f969b8f0 --- /dev/null +++ b/lbrynet/tests/functional/dht/test_contact_expiration.py @@ -0,0 +1,43 @@ +import logging +from twisted.internet import defer +from dht_test_environment import TestKademliaBase + +log = logging.getLogger() + + +class TestPeerExpiration(TestKademliaBase): + network_size = 40 + + @defer.inlineCallbacks + def test_expire_stale_peers(self): + removed_addresses = set() + removed_nodes = [] + self.show_info() + + for _ in range(5): + n = self.nodes[0] + removed_nodes.append(n) + removed_addresses.add(n.externalIP) + self.nodes.remove(n) + yield self.run_reactor(1, [n.stop()]) + + offline_addresses = self.get_routable_addresses().difference(self.get_online_addresses()) + self.assertSetEqual(offline_addresses, removed_addresses) + get_nodes_with_stale_contacts = lambda: filter(lambda node: any(contact.address in offline_addresses + for contact in node.contacts), self.nodes + self._seeds) + self.assertRaises(AssertionError, self.verify_all_nodes_are_routable) + self.assertTrue(len(get_nodes_with_stale_contacts()) > 1) + for _ in range(90): + log.info("Time is %f, nodes with stale contacts: %i/%i", self.clock.seconds(), + len(get_nodes_with_stale_contacts()), len(self.nodes + self._seeds)) + self.pump_clock(60) + self.assertTrue(len(get_nodes_with_stale_contacts()) == 0) + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() + + restarted_node = removed_nodes[0] + yield self.run_reactor(1, [restarted_node.start([(seed_name, 4444) + for seed_name in sorted(self.seed_dns.keys())])]) + + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() \ No newline at end of file From 760417ff3a21b4e1215fae6df7d54a5a2947592d Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 19:33:16 -0400 Subject: [PATCH 34/79] pylint --- lbrynet/core/Session.py | 1 - lbrynet/dht/contact.py | 2 +- lbrynet/dht/protocol.py | 4 ++-- lbrynet/dht/routingtable.py | 4 ++-- lbrynet/tests/functional/dht/test_contact_expiration.py | 2 +- lbrynet/tests/unit/dht/test_kbucket.py | 6 ++++-- lbrynet/tests/unit/dht/test_node.py | 5 +---- 7 files changed, 11 insertions(+), 13 deletions(-) diff --git a/lbrynet/core/Session.py b/lbrynet/core/Session.py index d05c492393..c2aefafc60 100644 --- a/lbrynet/core/Session.py +++ b/lbrynet/core/Session.py @@ -7,7 +7,6 @@ from lbrynet.core.RateLimiter import RateLimiter from lbrynet.core.utils import generate_id from lbrynet.core.PaymentRateManager import BasePaymentRateManager, OnlyFreePaymentsManager -from lbrynet.core.BlobAvailability import BlobAvailabilityTracker log = logging.getLogger(__name__) diff --git a/lbrynet/dht/contact.py b/lbrynet/dht/contact.py index 2ee26c6789..9cf6eb67a7 100644 --- a/lbrynet/dht/contact.py +++ b/lbrynet/dht/contact.py @@ -13,7 +13,7 @@ def __init__(self, contactManager, id, ipAddress, udpPort, networkProtocol, firs self._id = id if id is not None: if not len(id) == constants.key_bits / 8: - raise ValueError("invalid node id: %s", id.encode('hex')) + raise ValueError("invalid node id: %s" % id.encode('hex')) self.address = ipAddress self.port = udpPort self._networkProtocol = networkProtocol diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index 49325770c2..536315c444 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -137,7 +137,7 @@ def sendRPC(self, contact, method, args, rawResponse=False): log.debug("%s:%i SEND CALL %s(%s) TO %s:%i", self._node.externalIP, self._node.port, method, args[0].encode('hex'), contact.address, contact.port) else: - log.debug("%s:%i SEND CALL %s TO %s:%i", self._node.externalIP, self._node.port, method, + log.debug("%s:%i SEND CALL %s TO %s:%i", self._node.externalIP, self._node.port, method, contact.address, contact.port) df = defer.Deferred() @@ -395,7 +395,7 @@ def handleResult(result): log.debug("%s:%i RECV CALL %s(%s) %s:%i", self._node.externalIP, self._node.port, method, args[0].encode('hex'), senderContact.address, senderContact.port) else: - log.debug("%s:%i RECV CALL %s %s:%i", self._node.externalIP, self._node.port, method, + log.debug("%s:%i RECV CALL %s %s:%i", self._node.externalIP, self._node.port, method, senderContact.address, senderContact.port) try: if method != 'ping': diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 540871bb5e..70713a9270 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -124,8 +124,8 @@ def replaceContact(failure, deadContact): """ failure.trap(TimeoutError) log.debug("Replacing dead contact in bucket %i: %s:%i (%s) with %s:%i (%s)", bucketIndex, - deadContact.address, deadContact.port, deadContact.log_id(), contact.address, contact.port, - contact.log_id()) + deadContact.address, deadContact.port, deadContact.log_id(), contact.address, + contact.port, contact.log_id()) try: self._buckets[bucketIndex].removeContact(deadContact) except ValueError: diff --git a/lbrynet/tests/functional/dht/test_contact_expiration.py b/lbrynet/tests/functional/dht/test_contact_expiration.py index f7f969b8f0..57296b6475 100644 --- a/lbrynet/tests/functional/dht/test_contact_expiration.py +++ b/lbrynet/tests/functional/dht/test_contact_expiration.py @@ -40,4 +40,4 @@ def test_expire_stale_peers(self): for seed_name in sorted(self.seed_dns.keys())])]) self.verify_all_nodes_are_routable() - self.verify_all_nodes_are_pingable() \ No newline at end of file + self.verify_all_nodes_are_pingable() diff --git a/lbrynet/tests/unit/dht/test_kbucket.py b/lbrynet/tests/unit/dht/test_kbucket.py index 2896076b8c..100f63562c 100644 --- a/lbrynet/tests/unit/dht/test_kbucket.py +++ b/lbrynet/tests/unit/dht/test_kbucket.py @@ -69,13 +69,15 @@ def testGetContacts(self): if constants.k >= 2: for i in range(constants.k-2): node_ids.append(generate_id()) - tmpContact = self.contact_manager.make_contact(node_ids[-1], next(self.address_generator), 4444, 0, None) + tmpContact = self.contact_manager.make_contact(node_ids[-1], next(self.address_generator), 4444, 0, + None) self.kbucket.addContact(tmpContact) else: # add k contacts for i in range(constants.k): node_ids.append(generate_id()) - tmpContact = self.contact_manager.make_contact(node_ids[-1], next(self.address_generator), 4444, 0, None) + tmpContact = self.contact_manager.make_contact(node_ids[-1], next(self.address_generator), 4444, 0, + None) self.kbucket.addContact(tmpContact) # try to get too many contacts diff --git a/lbrynet/tests/unit/dht/test_node.py b/lbrynet/tests/unit/dht/test_node.py index c612d75938..3310523e95 100644 --- a/lbrynet/tests/unit/dht/test_node.py +++ b/lbrynet/tests/unit/dht/test_node.py @@ -8,12 +8,9 @@ from twisted.trial import unittest import struct -from twisted.internet import protocol, defer, selectreactor -from lbrynet.dht.msgtypes import ResponseMessage +from twisted.internet import defer from lbrynet.dht.node import Node from lbrynet.dht import constants -from lbrynet.dht.datastore import DictDataStore -from lbrynet.dht.routingtable import TreeRoutingTable class NodeIDTest(unittest.TestCase): From 3dfc6bd2cc69110ac2a80f625a047d0d9feb5bdf Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 20:41:01 -0400 Subject: [PATCH 35/79] update CallLaterManager to be an object --- lbrynet/core/call_later_manager.py | 65 +++++++++++++----------------- lbrynet/dht/node.py | 7 ++-- lbrynet/dht/protocol.py | 3 +- 3 files changed, 32 insertions(+), 43 deletions(-) diff --git a/lbrynet/core/call_later_manager.py b/lbrynet/core/call_later_manager.py index de73953229..d82b456eea 100644 --- a/lbrynet/core/call_later_manager.py +++ b/lbrynet/core/call_later_manager.py @@ -9,22 +9,25 @@ class CallLaterManager(object): - _callLater = None - _pendingCallLaters = [] - _delay = MIN_DELAY - - @classmethod - def get_min_delay(cls): - cls._pendingCallLaters = [cl for cl in cls._pendingCallLaters if cl.active()] - queue_size = len(cls._pendingCallLaters) + def __init__(self, callLater): + """ + :param callLater: (IReactorTime.callLater) + """ + + self._callLater = callLater + self._pendingCallLaters = [] + self._delay = MIN_DELAY + + def get_min_delay(self): + self._pendingCallLaters = [cl for cl in self._pendingCallLaters if cl.active()] + queue_size = len(self._pendingCallLaters) if queue_size > QUEUE_SIZE_THRESHOLD: - cls._delay = min((cls._delay + DELAY_INCREMENT), MAX_DELAY) + self._delay = min((self._delay + DELAY_INCREMENT), MAX_DELAY) else: - cls._delay = max((cls._delay - 2.0 * DELAY_INCREMENT), MIN_DELAY) - return cls._delay + self._delay = max((self._delay - 2.0 * DELAY_INCREMENT), MIN_DELAY) + return self._delay - @classmethod - def _cancel(cls, call_later): + def _cancel(self, call_later): """ :param call_later: DelayedCall :return: (callable) canceller function @@ -38,27 +41,25 @@ def cancel(reason=None): if call_later.active(): call_later.cancel() - if call_later in cls._pendingCallLaters: - cls._pendingCallLaters.remove(call_later) + if call_later in self._pendingCallLaters: + self._pendingCallLaters.remove(call_later) return reason return cancel - @classmethod - def stop(cls): + def stop(self): """ Cancel any callLaters that are still running """ from twisted.internet import defer - while cls._pendingCallLaters: - canceller = cls._cancel(cls._pendingCallLaters[0]) + while self._pendingCallLaters: + canceller = self._cancel(self._pendingCallLaters[0]) try: canceller() except (defer.CancelledError, defer.AlreadyCalledError, ValueError): pass - @classmethod - def call_later(cls, when, what, *args, **kwargs): + def call_later(self, when, what, *args, **kwargs): """ Schedule a call later and get a canceller callback function @@ -70,21 +71,11 @@ def call_later(cls, when, what, *args, **kwargs): :return: (tuple) twisted.internet.base.DelayedCall object, canceller function """ - call_later = cls._callLater(when, what, *args, **kwargs) - canceller = cls._cancel(call_later) - cls._pendingCallLaters.append(call_later) + call_later = self._callLater(when, what, *args, **kwargs) + canceller = self._cancel(call_later) + self._pendingCallLaters.append(call_later) return call_later, canceller - @classmethod - def call_soon(cls, what, *args, **kwargs): - delay = cls.get_min_delay() - return cls.call_later(delay, what, *args, **kwargs) - - @classmethod - def setup(cls, callLater): - """ - Setup the callLater function to use, supports the real reactor as well as task.Clock - - :param callLater: (IReactorTime.callLater) - """ - cls._callLater = callLater + def call_soon(self, what, *args, **kwargs): + delay = self.get_min_delay() + return self.call_later(delay, what, *args, **kwargs) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 4286e69ac3..1a6544ab1d 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -53,10 +53,9 @@ def __init__(self, clock=None, callLater=None, resolve=None, listenUDP=None): self.contact_manager = ContactManager(self.clock.seconds) self.reactor_listenUDP = listenUDP self.reactor_resolve = resolve - - CallLaterManager.setup(callLater) - self.reactor_callLater = CallLaterManager.call_later - self.reactor_callSoon = CallLaterManager.call_soon + self.call_later_manager = CallLaterManager(callLater) + self.reactor_callLater = self.call_later_manager.call_later + self.reactor_callSoon = self.call_later_manager.call_soon self._listeningPort = None # object implementing Twisted # IListeningPort This will contain a deferred created when diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index 536315c444..9bd4a6b46a 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -4,7 +4,6 @@ from collections import deque from twisted.internet import protocol, defer -from lbrynet.core.call_later_manager import CallLaterManager from error import BUILTIN_EXCEPTIONS, UnknownRemoteException, TimeoutError, TransportNotConnected import constants @@ -461,5 +460,5 @@ def stopProtocol(self): """ log.info('Stopping DHT') self._ping_queue.stop() - CallLaterManager.stop() + self._node.call_later_manager.stop() log.info('DHT stopped') From a952d2d2c883a3c8ea30d4db3f45421ad3fab98d Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 20:41:41 -0400 Subject: [PATCH 36/79] reset _listeningPort and _listening Deferred on teardown --- lbrynet/dht/node.py | 1 + lbrynet/dht/protocol.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 1a6544ab1d..363a9cd52a 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -159,6 +159,7 @@ def stop(self): yield self.safe_stop_looping_call(self._change_token_lc) if self._listeningPort is not None: yield self._listeningPort.stopListening() + self._listeningPort = None def start_listening(self): if not self._listeningPort: diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index 9bd4a6b46a..3653603390 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -171,6 +171,8 @@ def _update_contact(result): # refresh the contact in the routing table def startProtocol(self): log.info("DHT listening on UDP %s:%i", self._node.externalIP, self._node.port) + if self._listening.called: + self._listening = defer.Deferred() self._listening.callback(True) return self._ping_queue.start() From 877da7850597b7f5ed6398b1f1c8c30492896faf Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 20:42:51 -0400 Subject: [PATCH 37/79] ping contacts right away during refresh instead of using PingQueue -fixes contact expiration test --- lbrynet/dht/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 363a9cd52a..eeaed7edd2 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -656,7 +656,7 @@ def _refreshNode(self): def _refreshContacts(self): return defer.DeferredList( - [self._protocol._ping_queue.enqueue_maybe_ping(contact) for contact in self.contacts] + [contact.ping() for contact in self.contacts], consumeErrors=True ) @defer.inlineCallbacks From b5f3ed554271c140ac56ceeee24db9f875b3a536 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 20:44:28 -0400 Subject: [PATCH 38/79] update contact expiration test and add re-join after expiration test --- .../functional/dht/test_contact_expiration.py | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/lbrynet/tests/functional/dht/test_contact_expiration.py b/lbrynet/tests/functional/dht/test_contact_expiration.py index 57296b6475..f67b457bc4 100644 --- a/lbrynet/tests/functional/dht/test_contact_expiration.py +++ b/lbrynet/tests/functional/dht/test_contact_expiration.py @@ -14,6 +14,7 @@ def test_expire_stale_peers(self): removed_nodes = [] self.show_info() + # stop 5 nodes for _ in range(5): n = self.nodes[0] removed_nodes.append(n) @@ -23,11 +24,15 @@ def test_expire_stale_peers(self): offline_addresses = self.get_routable_addresses().difference(self.get_online_addresses()) self.assertSetEqual(offline_addresses, removed_addresses) + get_nodes_with_stale_contacts = lambda: filter(lambda node: any(contact.address in offline_addresses for contact in node.contacts), self.nodes + self._seeds) + self.assertRaises(AssertionError, self.verify_all_nodes_are_routable) self.assertTrue(len(get_nodes_with_stale_contacts()) > 1) - for _ in range(90): + + # run the network for an hour, which should expire the removed nodes + for _ in range(60): log.info("Time is %f, nodes with stale contacts: %i/%i", self.clock.seconds(), len(get_nodes_with_stale_contacts()), len(self.nodes + self._seeds)) self.pump_clock(60) @@ -35,9 +40,26 @@ def test_expire_stale_peers(self): self.verify_all_nodes_are_routable() self.verify_all_nodes_are_pingable() - restarted_node = removed_nodes[0] - yield self.run_reactor(1, [restarted_node.start([(seed_name, 4444) - for seed_name in sorted(self.seed_dns.keys())])]) +class TestReJoinExpiredPeer(TestKademliaBase): + network_size = 40 + + @defer.inlineCallbacks + def test_re_join_expired_peer(self): + + removed_node = self.nodes[0] + self.nodes.remove(removed_node) + yield self.run_reactor(1, [removed_node.stop()]) + + # run the network for an hour, which should expire the removed node + for _ in range(60): + self.pump_clock(60) + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() + self.nodes.append(removed_node) + yield self.run_reactor( + 31, [removed_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] + ) + self.pump_clock(901) self.verify_all_nodes_are_routable() self.verify_all_nodes_are_pingable() From 470ebe2de3332bae9f91b5cc9a6da5d812991331 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 23 May 2018 20:44:58 -0400 Subject: [PATCH 39/79] remove unnecessary CallLaterManager from test_contact_rpc --- lbrynet/tests/functional/dht/test_contact_rpc.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lbrynet/tests/functional/dht/test_contact_rpc.py b/lbrynet/tests/functional/dht/test_contact_rpc.py index 14641a011e..a7dd431997 100644 --- a/lbrynet/tests/functional/dht/test_contact_rpc.py +++ b/lbrynet/tests/functional/dht/test_contact_rpc.py @@ -9,10 +9,8 @@ import lbrynet.dht.msgtypes from lbrynet.dht.error import TimeoutError from lbrynet.dht.node import Node, rpcmethod -from lbrynet.core.call_later_manager import CallLaterManager from mock_transport import listenUDP, resolve - log = logging.getLogger() @@ -23,12 +21,10 @@ class KademliaProtocolTest(unittest.TestCase): def setUp(self): self._reactor = Clock() - CallLaterManager.setup(self._reactor.callLater) self.node = Node(node_id='1' * 48, udpPort=self.udpPort, externalIP="127.0.0.1", listenUDP=listenUDP, resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) def tearDown(self): - CallLaterManager.stop() del self._reactor @defer.inlineCallbacks From d02ed29e505009e8d68345e41f9cc428cb1830e4 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 24 May 2018 10:23:22 -0400 Subject: [PATCH 40/79] add kademlia store and expiration test --- lbrynet/dht/datastore.py | 22 ++++----- lbrynet/dht/node.py | 20 +++++--- lbrynet/tests/functional/dht/test_store.py | 56 ++++++++++++++++++++++ 3 files changed, 80 insertions(+), 18 deletions(-) create mode 100644 lbrynet/tests/functional/dht/test_store.py diff --git a/lbrynet/dht/datastore.py b/lbrynet/dht/datastore.py index 57cdac9ae5..edfa543fc0 100644 --- a/lbrynet/dht/datastore.py +++ b/lbrynet/dht/datastore.py @@ -23,18 +23,16 @@ def keys(self): def removeExpiredPeers(self): now = int(self._getTime()) - - def notExpired(peer): - if (now - peer[2]) > constants.dataExpireTimeout: - return False - return True - for key in self._dict.keys(): - unexpired_peers = filter(notExpired, self._dict[key]) - self._dict[key] = unexpired_peers + unexpired_peers = filter(lambda peer: now - peer[2] < constants.dataExpireTimeout, self._dict[key]) + if not unexpired_peers: + del self._dict[key] + else: + self._dict[key] = unexpired_peers def hasPeersForBlob(self, key): - if key in self._dict and len(self._dict[key]) > 0: + if key in self._dict and len(filter(lambda peer: self._getTime() - peer[2] < constants.dataExpireTimeout, + self._dict[key])): return True return False @@ -46,8 +44,10 @@ def addPeerToBlob(self, key, value, lastPublished, originallyPublished, original self._dict[key] = [(value, lastPublished, originallyPublished, originalPublisherID)] def getPeersForBlob(self, key): - if key in self._dict: - return [val[0] for val in self._dict[key]] + return [] if key not in self._dict else [ + val[0] for val in filter(lambda peer: self._getTime() - peer[2] < constants.dataExpireTimeout, + self._dict[key]) + ] def removePeer(self, value): for key in self._dict: diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index eeaed7edd2..aaca0a372e 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -30,6 +30,14 @@ log = logging.getLogger(__name__) +def expand_peer(compact_peer_info): + host = ".".join([str(ord(d)) for d in compact_peer_info[:4]]) + port, = struct.unpack('>H', compact_peer_info[4:6]) + peer_node_id = compact_peer_info[6:] + return (peer_node_id, host, port) + + + def rpcmethod(func): """ Decorator to expose Node methods as remote procedure calls @@ -142,7 +150,7 @@ def __init__(self, node_id=None, udpPort=4000, dataStore=None, self.old_token_secret = None self.externalIP = externalIP self.peerPort = peerPort - self._dataStore = dataStore or datastore.DictDataStore() + self._dataStore = dataStore or datastore.DictDataStore(self.clock.seconds) self.peer_manager = peer_manager or PeerManager() self.peer_finder = peer_finder or DHTPeerFinder(self, self.peer_manager) self._join_deferred = None @@ -428,11 +436,9 @@ def iterativeFindValue(self, key): if find_result: if key in find_result: for peer in find_result[key]: - host = ".".join([str(ord(d)) for d in peer[:4]]) - port, = struct.unpack('>H', peer[4:6]) - peer_node_id = peer[6:] - if (host, port, peer_node_id) not in expanded_peers: - expanded_peers.append((peer_node_id, host, port)) + expanded = expand_peer(peer) + if expanded not in expanded_peers: + expanded_peers.append(expanded) # TODO: get this working # if 'closestNodeNoValue' in find_result: # closest_node_without_value = find_result['closestNodeNoValue'] @@ -532,7 +538,7 @@ def store(self, rpc_contact, blob_hash, token, port, originalPublisherID=None, a raise TypeError('Invalid port') compact_address = compact_ip + compact_port + rpc_contact.id - now = int(time.time()) + now = int(self.clock.seconds()) originallyPublished = now - age self._dataStore.addPeerToBlob(blob_hash, compact_address, now, originallyPublished, originalPublisherID) return 'OK' diff --git a/lbrynet/tests/functional/dht/test_store.py b/lbrynet/tests/functional/dht/test_store.py new file mode 100644 index 0000000000..a33a28f778 --- /dev/null +++ b/lbrynet/tests/functional/dht/test_store.py @@ -0,0 +1,56 @@ +import struct +from twisted.internet import defer +from lbrynet.dht import constants +from lbrynet.core.utils import generate_id +from dht_test_environment import TestKademliaBase +import logging + +log = logging.getLogger() + + +class TestStore(TestKademliaBase): + network_size = 40 + + @defer.inlineCallbacks + def test_store_and_expire(self): + blob_hash = generate_id() + announcing_node = self.nodes[20] + # announce the blob + announce_d = announcing_node.announceHaveBlob(blob_hash) + self.pump_clock(5) + storing_node_ids = yield announce_d + all_nodes = set(self.nodes).union(set(self._seeds)) + + # verify the nodes we think stored it did actually store it + storing_nodes = [node for node in all_nodes if node.node_id.encode('hex') in storing_node_ids] + self.assertEquals(len(storing_nodes), len(storing_node_ids)) + self.assertEquals(len(storing_nodes), constants.k) + for node in storing_nodes: + self.assertTrue(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(len(datastore_result), 1) + expanded_peers = [] + for peer in datastore_result: + host = ".".join([str(ord(d)) for d in peer[:4]]) + port, = struct.unpack('>H', peer[4:6]) + peer_node_id = peer[6:] + if (host, port, peer_node_id) not in expanded_peers: + expanded_peers.append((peer_node_id, host, port)) + self.assertEquals(expanded_peers[0], + (announcing_node.node_id, announcing_node.externalIP, announcing_node.peerPort)) + + # verify the announced blob expires in the storing nodes datastores + + self.clock.advance(constants.dataExpireTimeout) # skip the clock directly ahead + for node in storing_nodes: + self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(len(datastore_result), 0) + self.assertTrue(blob_hash in node._dataStore._dict) # the looping call shouldn't have removed it yet + + self.pump_clock(constants.checkRefreshInterval + 1) # tick the clock forward (so the nodes refresh) + for node in storing_nodes: + self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(len(datastore_result), 0) + self.assertTrue(blob_hash not in node._dataStore._dict) # the looping call should have fired after From c521120b177b95ad8c24aecd184e1190261e503d Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 24 May 2018 12:56:39 -0400 Subject: [PATCH 41/79] update and fix hash announcer test --- lbrynet/dht/hashannouncer.py | 2 +- lbrynet/tests/mocks.py | 3 -- .../test_hash_announcer.py} | 48 ++++++++++--------- 3 files changed, 27 insertions(+), 26 deletions(-) rename lbrynet/tests/unit/{core/server/test_DHTHashAnnouncer.py => dht/test_hash_announcer.py} (59%) diff --git a/lbrynet/dht/hashannouncer.py b/lbrynet/dht/hashannouncer.py index 8b3f9db02e..7c89c1c688 100644 --- a/lbrynet/dht/hashannouncer.py +++ b/lbrynet/dht/hashannouncer.py @@ -50,10 +50,10 @@ def _show_announce_progress(self, size, start): @defer.inlineCallbacks def immediate_announce(self, blob_hashes): self.hash_queue.extend(b for b in blob_hashes if b not in self.hash_queue) - log.info("Announcing %i blobs", len(self.hash_queue)) start = self.clock.seconds() progress_lc = task.LoopingCall(self._show_announce_progress, len(self.hash_queue), start) + progress_lc.clock = self.clock progress_lc.start(60, now=False) s = defer.DeferredSemaphore(self.concurrent_announcers) results = yield utils.DeferredDict({blob_hash: s.run(self.do_store, blob_hash) for blob_hash in blob_hashes}) diff --git a/lbrynet/tests/mocks.py b/lbrynet/tests/mocks.py index 5074e0531f..c8e131362b 100644 --- a/lbrynet/tests/mocks.py +++ b/lbrynet/tests/mocks.py @@ -250,9 +250,6 @@ def __init__(self, *args): def hash_queue_size(self): return 0 - def add_supplier(self, supplier): - pass - def immediate_announce(self, *args): pass diff --git a/lbrynet/tests/unit/core/server/test_DHTHashAnnouncer.py b/lbrynet/tests/unit/dht/test_hash_announcer.py similarity index 59% rename from lbrynet/tests/unit/core/server/test_DHTHashAnnouncer.py rename to lbrynet/tests/unit/dht/test_hash_announcer.py index 60021ffc9a..72f4b4cfc6 100644 --- a/lbrynet/tests/unit/core/server/test_DHTHashAnnouncer.py +++ b/lbrynet/tests/unit/dht/test_hash_announcer.py @@ -1,55 +1,59 @@ from twisted.trial import unittest from twisted.internet import defer, task - +from lbrynet import conf from lbrynet.core import utils +from lbrynet.dht.hashannouncer import DHTHashAnnouncer from lbrynet.tests.util import random_lbry_hash + class MocDHTNode(object): def __init__(self): self.blobs_announced = 0 + self.clock = task.Clock() + self.peerPort = 3333 def announceHaveBlob(self, blob): self.blobs_announced += 1 - return defer.succeed(True) + d = defer.Deferred() + self.clock.callLater(1, d.callback, ['fake']) + return d + -class MocSupplier(object): +class MocStorage(object): def __init__(self, blobs_to_announce): self.blobs_to_announce = blobs_to_announce self.announced = False - def hashes_to_announce(self): + + def get_blobs_to_announce(self): if not self.announced: self.announced = True return defer.succeed(self.blobs_to_announce) else: return defer.succeed([]) + def update_last_announced_blob(self, blob_hash, now): + return defer.succeed(None) + + class DHTHashAnnouncerTest(unittest.TestCase): def setUp(self): + conf.initialize_settings(False) self.num_blobs = 10 self.blobs_to_announce = [] for i in range(0, self.num_blobs): self.blobs_to_announce.append(random_lbry_hash()) - self.clock = task.Clock() self.dht_node = MocDHTNode() + self.clock = self.dht_node.clock utils.call_later = self.clock.callLater - from lbrynet.core.server.DHTHashAnnouncer import DHTHashAnnouncer - self.announcer = DHTHashAnnouncer(self.dht_node, peer_port=3333) - self.supplier = MocSupplier(self.blobs_to_announce) - self.announcer.add_supplier(self.supplier) - - def test_basic(self): - self.announcer._announce_available_hashes() - self.assertEqual(self.announcer.hash_queue_size(), self.announcer.CONCURRENT_ANNOUNCERS) + self.storage = MocStorage(self.blobs_to_announce) + self.announcer = DHTHashAnnouncer(self.dht_node, self.storage) + + @defer.inlineCallbacks + def test_immediate_announce(self): + announce_d = self.announcer.immediate_announce(self.blobs_to_announce) + self.assertEqual(self.announcer.hash_queue_size(), self.num_blobs) self.clock.advance(1) + yield announce_d self.assertEqual(self.dht_node.blobs_announced, self.num_blobs) self.assertEqual(self.announcer.hash_queue_size(), 0) - - def test_immediate_announce(self): - # Test that immediate announce puts a hash at the front of the queue - self.announcer._announce_available_hashes() - blob_hash = random_lbry_hash() - self.announcer.immediate_announce([blob_hash]) - self.assertEqual(self.announcer.hash_queue_size(), self.announcer.CONCURRENT_ANNOUNCERS+1) - self.assertEqual(blob_hash, self.announcer.hash_queue[0][0]) - From 98e21cdba0b45cca57ff2bf3ddda3f1a8c6f1342 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 24 May 2018 12:57:20 -0400 Subject: [PATCH 42/79] test re-join dht --- .../functional/dht/test_contact_expiration.py | 24 ------- .../functional/dht/test_contact_rejoin.py | 70 +++++++++++++++++++ 2 files changed, 70 insertions(+), 24 deletions(-) create mode 100644 lbrynet/tests/functional/dht/test_contact_rejoin.py diff --git a/lbrynet/tests/functional/dht/test_contact_expiration.py b/lbrynet/tests/functional/dht/test_contact_expiration.py index f67b457bc4..44d20d98cd 100644 --- a/lbrynet/tests/functional/dht/test_contact_expiration.py +++ b/lbrynet/tests/functional/dht/test_contact_expiration.py @@ -39,27 +39,3 @@ def test_expire_stale_peers(self): self.assertTrue(len(get_nodes_with_stale_contacts()) == 0) self.verify_all_nodes_are_routable() self.verify_all_nodes_are_pingable() - - -class TestReJoinExpiredPeer(TestKademliaBase): - network_size = 40 - - @defer.inlineCallbacks - def test_re_join_expired_peer(self): - - removed_node = self.nodes[0] - self.nodes.remove(removed_node) - yield self.run_reactor(1, [removed_node.stop()]) - - # run the network for an hour, which should expire the removed node - for _ in range(60): - self.pump_clock(60) - self.verify_all_nodes_are_routable() - self.verify_all_nodes_are_pingable() - self.nodes.append(removed_node) - yield self.run_reactor( - 31, [removed_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] - ) - self.pump_clock(901) - self.verify_all_nodes_are_routable() - self.verify_all_nodes_are_pingable() diff --git a/lbrynet/tests/functional/dht/test_contact_rejoin.py b/lbrynet/tests/functional/dht/test_contact_rejoin.py new file mode 100644 index 0000000000..72cb939fc9 --- /dev/null +++ b/lbrynet/tests/functional/dht/test_contact_rejoin.py @@ -0,0 +1,70 @@ +import logging +from twisted.internet import defer +from dht_test_environment import TestKademliaBase + +log = logging.getLogger() + + +class TestReJoin(TestKademliaBase): + network_size = 40 + + @defer.inlineCallbacks + def test_re_join(self): + + removed_node = self.nodes[0] + self.nodes.remove(removed_node) + yield self.run_reactor(1, [removed_node.stop()]) + + # run the network for an hour, which should expire the removed node + self.pump_clock(3600) + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() + self.nodes.append(removed_node) + yield self.run_reactor( + 31, [removed_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] + ) + self.pump_clock(901) + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() + + @defer.inlineCallbacks + def test_re_join_with_new_ip(self): + + removed_node = self.nodes[0] + self.nodes.remove(removed_node) + yield self.run_reactor(1, [removed_node.stop()]) + + # run the network for an hour, which should expire the removed node + for _ in range(60): + self.pump_clock(60) + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() + removed_node.externalIP = "10.43.43.43" + self.nodes.append(removed_node) + yield self.run_reactor( + 31, [removed_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] + ) + self.pump_clock(901) + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() + + @defer.inlineCallbacks + def test_re_join_with_new_node_id(self): + + removed_node = self.nodes[0] + self.nodes.remove(removed_node) + yield self.run_reactor(1, [removed_node.stop()]) + + # run the network for an hour, which should expire the removed node + for _ in range(60): + self.pump_clock(60) + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() + removed_node.node_id = removed_node._generateID() + self.nodes.append(removed_node) + yield self.run_reactor( + 31, [removed_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] + ) + self.pump_clock(901) + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() From 07f92014d7229c7665cba2f88fb320181261b326 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 24 May 2018 15:52:37 -0400 Subject: [PATCH 43/79] omit bad contacts from getPeersForBlob -refresh stored contacts to detect when they go offline or come back --- lbrynet/dht/contact.py | 17 ++- lbrynet/dht/datastore.py | 44 ++++--- lbrynet/dht/node.py | 34 ++++-- lbrynet/tests/functional/dht/test_store.py | 130 +++++++++++++++++++++ 4 files changed, 198 insertions(+), 27 deletions(-) diff --git a/lbrynet/dht/contact.py b/lbrynet/dht/contact.py index 9cf6eb67a7..b8dac520ff 100644 --- a/lbrynet/dht/contact.py +++ b/lbrynet/dht/contact.py @@ -1,6 +1,15 @@ +import ipaddress from lbrynet.dht import constants +def is_valid_ipv4(address): + try: + ip = ipaddress.ip_address(address.decode()) # this needs to be unicode, thus the decode() + return ip.version == 4 + except ipaddress.AddressValueError: + return False + + class _Contact(object): """ Encapsulation for remote contact @@ -9,11 +18,15 @@ class _Contact(object): """ def __init__(self, contactManager, id, ipAddress, udpPort, networkProtocol, firstComm): - self._contactManager = contactManager - self._id = id if id is not None: if not len(id) == constants.key_bits / 8: raise ValueError("invalid node id: %s" % id.encode('hex')) + if not 0 <= udpPort <= 65536: + raise ValueError("invalid port") + if not is_valid_ipv4(ipAddress): + raise ValueError("invalid ip address") + self._contactManager = contactManager + self._id = id self.address = ipAddress self.port = udpPort self._networkProtocol = networkProtocol diff --git a/lbrynet/dht/datastore.py b/lbrynet/dht/datastore.py index edfa543fc0..f9e06923c0 100644 --- a/lbrynet/dht/datastore.py +++ b/lbrynet/dht/datastore.py @@ -21,36 +21,54 @@ def keys(self): """ Return a list of the keys in this data store """ return self._dict.keys() + def filter_bad_and_expired_peers(self, key): + """ + Returns only non-expired and unknown/good peers + """ + return filter( + lambda peer: + self._getTime() - peer[3] < constants.dataExpireTimeout and peer[0].contact_is_good is not False, + self._dict[key] + ) + + def filter_expired_peers(self, key): + """ + Returns only non-expired peers + """ + return filter(lambda peer: self._getTime() - peer[2] < constants.dataExpireTimeout, self._dict[key]) + def removeExpiredPeers(self): - now = int(self._getTime()) for key in self._dict.keys(): - unexpired_peers = filter(lambda peer: now - peer[2] < constants.dataExpireTimeout, self._dict[key]) + unexpired_peers = self.filter_expired_peers(key) if not unexpired_peers: del self._dict[key] else: self._dict[key] = unexpired_peers def hasPeersForBlob(self, key): - if key in self._dict and len(filter(lambda peer: self._getTime() - peer[2] < constants.dataExpireTimeout, - self._dict[key])): + if key in self._dict and len(self.filter_bad_and_expired_peers(key)): return True return False - def addPeerToBlob(self, key, value, lastPublished, originallyPublished, originalPublisherID): + def addPeerToBlob(self, contact, key, compact_address, lastPublished, originallyPublished, originalPublisherID): if key in self._dict: - if value not in map(lambda store_tuple: store_tuple[0], self._dict[key]): - self._dict[key].append((value, lastPublished, originallyPublished, originalPublisherID)) + if compact_address not in map(lambda store_tuple: store_tuple[1], self._dict[key]): + self._dict[key].append((contact, compact_address, lastPublished, originallyPublished, originalPublisherID)) else: - self._dict[key] = [(value, lastPublished, originallyPublished, originalPublisherID)] + self._dict[key] = [(contact, compact_address, lastPublished, originallyPublished, originalPublisherID)] def getPeersForBlob(self, key): - return [] if key not in self._dict else [ - val[0] for val in filter(lambda peer: self._getTime() - peer[2] < constants.dataExpireTimeout, - self._dict[key]) - ] + return [] if key not in self._dict else [val[1] for val in self.filter_bad_and_expired_peers(key)] def removePeer(self, value): for key in self._dict: - self._dict[key] = [val for val in self._dict[key] if val[0] != value] + self._dict[key] = [val for val in self._dict[key] if val[1] != value] if not self._dict[key]: del self._dict[key] + + def getStoringContacts(self): + contacts = set() + for key in self._dict: + for values in self._dict[key]: + contacts.add(values[0]) + return list(contacts) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index aaca0a372e..2601248e55 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -506,15 +506,20 @@ def ping(self): @rpcmethod def store(self, rpc_contact, blob_hash, token, port, originalPublisherID=None, age=0): - """ Store the received data in this node's local hash table + """ Store the received data in this node's local datastore - @param blob_hash: The hashtable key of the data + @param blob_hash: The hash of the data @type blob_hash: str - @param value: The actual data (the value associated with C{key}) - @type value: str - @param originalPublisherID: The node ID of the node that is the - B{original} publisher of the data + + @param token: The token we previously returned when this contact sent us a findValue + @type token: str + + @param port: The TCP port the contact is listening on for requests for this blob (the peerPort) + @type port: int + + @param originalPublisherID: The node ID of the node that is the publisher of the data @type originalPublisherID: str + @param age: The relative age of the data (time in seconds since it was originally published). Note that the original publish time isn't actually given, to compensate for clock skew between @@ -522,11 +527,8 @@ def store(self, rpc_contact, blob_hash, token, port, originalPublisherID=None, a @type age: int @rtype: str - - @todo: Since the data (value) may be large, passing it around as a buffer - (which is the case currently) might not be a good idea... will have - to fix this (perhaps use a stream from the Protocol class?) """ + if originalPublisherID is None: originalPublisherID = rpc_contact.id compact_ip = rpc_contact.compact_ip() @@ -536,11 +538,11 @@ def store(self, rpc_contact, blob_hash, token, port, originalPublisherID=None, a compact_port = str(struct.pack('>H', port)) else: raise TypeError('Invalid port') - compact_address = compact_ip + compact_port + rpc_contact.id now = int(self.clock.seconds()) originallyPublished = now - age - self._dataStore.addPeerToBlob(blob_hash, compact_address, now, originallyPublished, originalPublisherID) + self._dataStore.addPeerToBlob(rpc_contact, blob_hash, compact_address, now, originallyPublished, + originalPublisherID) return 'OK' @rpcmethod @@ -658,6 +660,7 @@ def _refreshNode(self): replication/republishing as necessary """ yield self._refreshRoutingTable() self._dataStore.removeExpiredPeers() + yield self._refreshStoringPeers() defer.returnValue(None) def _refreshContacts(self): @@ -665,6 +668,13 @@ def _refreshContacts(self): [contact.ping() for contact in self.contacts], consumeErrors=True ) + def _refreshStoringPeers(self): + storing_contacts = self._dataStore.getStoringContacts() + return defer.DeferredList( + [self._protocol._ping_queue.enqueue_maybe_ping(contact) for contact in storing_contacts], + consumeErrors=True + ) + @defer.inlineCallbacks def _refreshRoutingTable(self): nodeIDs = self._routingTable.getRefreshList(0, True) diff --git a/lbrynet/tests/functional/dht/test_store.py b/lbrynet/tests/functional/dht/test_store.py index a33a28f778..911ea2563b 100644 --- a/lbrynet/tests/functional/dht/test_store.py +++ b/lbrynet/tests/functional/dht/test_store.py @@ -28,6 +28,10 @@ def test_store_and_expire(self): for node in storing_nodes: self.assertTrue(node._dataStore.hasPeersForBlob(blob_hash)) datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(map(lambda contact: (contact.id, contact.address, contact.port), + node._dataStore.getStoringContacts()), [(announcing_node.node_id, + announcing_node.externalIP, + announcing_node.port)]) self.assertEquals(len(datastore_result), 1) expanded_peers = [] for peer in datastore_result: @@ -47,10 +51,136 @@ def test_store_and_expire(self): datastore_result = node._dataStore.getPeersForBlob(blob_hash) self.assertEquals(len(datastore_result), 0) self.assertTrue(blob_hash in node._dataStore._dict) # the looping call shouldn't have removed it yet + self.assertEquals(len(node._dataStore.getStoringContacts()), 1) self.pump_clock(constants.checkRefreshInterval + 1) # tick the clock forward (so the nodes refresh) for node in storing_nodes: self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) datastore_result = node._dataStore.getPeersForBlob(blob_hash) self.assertEquals(len(datastore_result), 0) + self.assertEquals(len(node._dataStore.getStoringContacts()), 0) + self.assertTrue(blob_hash not in node._dataStore._dict) # the looping call should have fired + + @defer.inlineCallbacks + def test_refresh_storing_peers(self): + blob_hash = generate_id() + announcing_node = self.nodes[20] + # announce the blob + announce_d = announcing_node.announceHaveBlob(blob_hash) + self.pump_clock(5) + storing_node_ids = yield announce_d + all_nodes = set(self.nodes).union(set(self._seeds)) + + # verify the nodes we think stored it did actually store it + storing_nodes = [node for node in all_nodes if node.node_id.encode('hex') in storing_node_ids] + self.assertEquals(len(storing_nodes), len(storing_node_ids)) + self.assertEquals(len(storing_nodes), constants.k) + for node in storing_nodes: + self.assertTrue(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(map(lambda contact: (contact.id, contact.address, contact.port), + node._dataStore.getStoringContacts()), [(announcing_node.node_id, + announcing_node.externalIP, + announcing_node.port)]) + self.assertEquals(len(datastore_result), 1) + expanded_peers = [] + for peer in datastore_result: + host = ".".join([str(ord(d)) for d in peer[:4]]) + port, = struct.unpack('>H', peer[4:6]) + peer_node_id = peer[6:] + if (host, port, peer_node_id) not in expanded_peers: + expanded_peers.append((peer_node_id, host, port)) + self.assertEquals(expanded_peers[0], + (announcing_node.node_id, announcing_node.externalIP, announcing_node.peerPort)) + + self.pump_clock(constants.checkRefreshInterval + 1) # tick the clock forward (so the nodes refresh) + + # verify the announced blob expires in the storing nodes datastores + + self.clock.advance(constants.dataExpireTimeout) # skip the clock directly ahead + for node in storing_nodes: + self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(len(datastore_result), 0) + self.assertTrue(blob_hash in node._dataStore._dict) # the looping call shouldn't have removed it yet + self.assertEquals(len(node._dataStore.getStoringContacts()), 1) + + self.pump_clock(constants.checkRefreshInterval + 1) # tick the clock forward (so the nodes refresh) + for node in storing_nodes: + self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(len(datastore_result), 0) + self.assertEquals(len(node._dataStore.getStoringContacts()), 0) self.assertTrue(blob_hash not in node._dataStore._dict) # the looping call should have fired after + + +class TestStoringNodeWentStale(TestKademliaBase): + network_size = 40 + + @defer.inlineCallbacks + def test_storing_node_went_stale_then_came_back(self): + blob_hash = generate_id() + announcing_node = self.nodes[20] + # announce the blob + announce_d = announcing_node.announceHaveBlob(blob_hash) + announce_time = self.clock.seconds() + self.pump_clock(5) + storing_node_ids = yield announce_d + all_nodes = set(self.nodes).union(set(self._seeds)) + + # verify the nodes we think stored it did actually store it + storing_nodes = [node for node in all_nodes if node.node_id.encode('hex') in storing_node_ids] + self.assertEquals(len(storing_nodes), len(storing_node_ids)) + self.assertEquals(len(storing_nodes), constants.k) + for node in storing_nodes: + self.assertTrue(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(map(lambda contact: (contact.id, contact.address, contact.port), + node._dataStore.getStoringContacts()), [(announcing_node.node_id, + announcing_node.externalIP, + announcing_node.port)]) + self.assertEquals(len(datastore_result), 1) + expanded_peers = [] + for peer in datastore_result: + host = ".".join([str(ord(d)) for d in peer[:4]]) + port, = struct.unpack('>H', peer[4:6]) + peer_node_id = peer[6:] + if (host, port, peer_node_id) not in expanded_peers: + expanded_peers.append((peer_node_id, host, port)) + self.assertEquals(expanded_peers[0], + (announcing_node.node_id, announcing_node.externalIP, announcing_node.peerPort)) + + self.nodes.remove(announcing_node) + yield self.run_reactor(1, [announcing_node.stop()]) + + # run the network for an hour, which should expire the removed node and the announced value + self.pump_clock(3600) + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() + + for node in storing_nodes: # make sure the contact isn't returned as a peer for the blob, but that + # we still have the entry in the datastore in case the node returns + self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(len(datastore_result), 0) + self.assertEquals(len(node._dataStore.getStoringContacts()), 1) + self.assertTrue(blob_hash in node._dataStore._dict) + + # bring the announcing node back online + self.nodes.append(announcing_node) + yield self.run_reactor( + 31, [announcing_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] + ) + self.pump_clock(24*60+1) # FIXME: this should work after 12 minutes + 1 second, yet it doesnt + self.verify_all_nodes_are_routable() + self.verify_all_nodes_are_pingable() + + # now the announcing node should once again be returned as a peer for the blob + for node in storing_nodes: + self.assertTrue(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(len(datastore_result), 1) + self.assertEquals(len(node._dataStore.getStoringContacts()), 1) + self.assertTrue(blob_hash in node._dataStore._dict) + + # TODO: handle the case where the announcing node re joins with a different address from what is stored From 29d5750371419d6ee63c6ed6b9fc139b868e6749 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 24 May 2018 15:53:43 -0400 Subject: [PATCH 44/79] pylint --- lbrynet/dht/node.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 2601248e55..09ff23efd3 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -9,7 +9,6 @@ import binascii import hashlib import struct -import time import logging from twisted.internet import defer, error, task From ec1b6b2387c4b6238c0b2b9da562504d06a6938a Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 10:58:24 -0400 Subject: [PATCH 45/79] comments, cleaner key_bits constant --- lbrynet/dht/constants.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lbrynet/dht/constants.py b/lbrynet/dht/constants.py index 9ce21d96b1..7ea3f7258d 100644 --- a/lbrynet/dht/constants.py +++ b/lbrynet/dht/constants.py @@ -45,17 +45,13 @@ ######## IMPLEMENTATION-SPECIFIC CONSTANTS ########### -#: The interval in which the node should check its whether any buckets need refreshing, -#: or whether any data needs to be republished (in seconds) +#: The interval for the node to check whether any buckets need refreshing checkRefreshInterval = refreshTimeout / 5 #: Max size of a single UDP datagram, in bytes. If a message is larger than this, it will #: be spread across several UDP packets. udpDatagramMaxSize = 8192 # 8 KB -from lbrynet.core.cryptoutils import get_lbry_hash_obj - -h = get_lbry_hash_obj() -key_bits = h.digest_size * 8 # 384 bits +key_bits = 384 rpc_id_length = 20 From 0d23c687970fa0f06f52cd1545330abf55e19ba7 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 10:59:07 -0400 Subject: [PATCH 46/79] raise attribute error for non-rpc functions in Contact --- lbrynet/dht/contact.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lbrynet/dht/contact.py b/lbrynet/dht/contact.py index b8dac520ff..d7bbb443b9 100644 --- a/lbrynet/dht/contact.py +++ b/lbrynet/dht/contact.py @@ -139,6 +139,9 @@ def __getattr__(self, name): host Node's C{_protocol} object). """ + if name not in ['ping', 'findValue', 'findNode', 'store']: + raise AttributeError("unknown command: %s" % name) + def _sendRPC(*args, **kwargs): return self._networkProtocol.sendRPC(self, name, args, **kwargs) From 73e813f9ec37585a6b43791f1281eb0cdf43469f Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 10:59:46 -0400 Subject: [PATCH 47/79] verify key size in Distance --- lbrynet/dht/distance.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lbrynet/dht/distance.py b/lbrynet/dht/distance.py index cda548db20..8945f8b30f 100644 --- a/lbrynet/dht/distance.py +++ b/lbrynet/dht/distance.py @@ -1,3 +1,6 @@ +from lbrynet.dht import constants + + class Distance(object): """Calculate the XOR result between two string variables. @@ -6,6 +9,8 @@ class Distance(object): """ def __init__(self, key): + if len(key) != constants.key_bits / 8: + raise ValueError("invalid key length: %i", len(key)) self.key = key self.val_key_one = long(key.encode('hex'), 16) From fb3aac15fa865827f3cb6eb28b43059de2cf513b Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 11:00:59 -0400 Subject: [PATCH 48/79] add optional delay argument to enqueue_maybe_ping, fix default value --- lbrynet/dht/protocol.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index 3653603390..829c02e9b4 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -28,12 +28,12 @@ def __init__(self, node): self._semaphore = defer.DeferredSemaphore(1) self._ping_semaphore = defer.DeferredSemaphore(constants.alpha) self._process_lc = node.get_looping_call(self._semaphore.run, self._process) - self._delay = 300 - def _add_contact(self, contact): + def _add_contact(self, contact, delay=None): if contact in self._enqueued_contacts: return defer.succeed(None) - self._enqueued_contacts[contact] = self._get_time() + self._delay + delay = delay or constants.checkRefreshInterval + self._enqueued_contacts[contact] = self._get_time() + delay self._queue.append(contact) return defer.succeed(None) @@ -49,11 +49,6 @@ def _process(self): self._queue.appendleft(contact) defer.returnValue(None) - def _ping(contact): - d = contact.ping() - d.addErrback(lambda err: err.trap(TimeoutError)) - return d - pinged = [] checked = [] while now > self._enqueued_contacts[contact]: @@ -65,9 +60,17 @@ def _ping(contact): contact = self._queue.popleft() if not now > self._enqueued_contacts[contact]: checked.append(contact) - # log.info("ping %i/%i peers", len(pinged), len(checked)) - yield defer.DeferredList([self._ping_semaphore.run(_ping, contact) for contact in pinged]) + @defer.inlineCallbacks + def _ping(contact): + try: + yield contact.ping() + except TimeoutError: + pass + except Exception as err: + log.warning("unexpected error: %s", err) + + yield defer.DeferredList([_ping(contact) for contact in pinged]) for contact in checked: if contact in self._enqueued_contacts: @@ -81,8 +84,8 @@ def start(self): def stop(self): return self._node.safe_stop_looping_call(self._process_lc) - def enqueue_maybe_ping(self, contact): - return self._semaphore.run(self._add_contact, contact) + def enqueue_maybe_ping(self, contact, delay=None): + return self._semaphore.run(self._add_contact, contact, delay) class KademliaProtocol(protocol.DatagramProtocol): From 8efede6ad67b9889ab04c627b8a85ed9c446403a Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 11:01:58 -0400 Subject: [PATCH 49/79] maybe_ping bad and unknown contacts instead of only unknown --- lbrynet/dht/protocol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index 829c02e9b4..f7a39a8320 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -53,7 +53,7 @@ def _process(self): checked = [] while now > self._enqueued_contacts[contact]: checked.append(contact) - if contact.contact_is_good is None: + if not contact.contact_is_good: pinged.append(contact) if not len(self._queue): break From 921ee3c4c15f839cc78cd4477ba5545f17f5206e Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 11:02:44 -0400 Subject: [PATCH 50/79] use refreshTimeout in getRefreshList --- lbrynet/dht/routingtable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 70713a9270..6e5456bb84 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -244,7 +244,7 @@ def getRefreshList(self, startIndex=0, force=False): refreshIDs = [] now = int(self._getTime()) for bucket in self._buckets[startIndex:]: - if force or now - bucket.lastAccessed >= constants.checkRefreshInterval: + if force or now - bucket.lastAccessed >= constants.refreshTimeout: searchID = self._randomIDInBucketRange(bucketIndex) refreshIDs.append(searchID) bucketIndex += 1 From 945da5985ee86ba9e1f9ac96c1cf25f7d6d609b2 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 11:04:13 -0400 Subject: [PATCH 51/79] fix age used in datastore to determine if a value is expired --- lbrynet/dht/datastore.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/lbrynet/dht/datastore.py b/lbrynet/dht/datastore.py index f9e06923c0..012122335b 100644 --- a/lbrynet/dht/datastore.py +++ b/lbrynet/dht/datastore.py @@ -10,7 +10,7 @@ class DictDataStore(UserDict.DictMixin): def __init__(self, getTime=None): # Dictionary format: - # { : (, , ) } + # { : (, , , ) } self._dict = {} if not getTime: from twisted.internet import reactor @@ -35,7 +35,7 @@ def filter_expired_peers(self, key): """ Returns only non-expired peers """ - return filter(lambda peer: self._getTime() - peer[2] < constants.dataExpireTimeout, self._dict[key]) + return filter(lambda peer: self._getTime() - peer[3] < constants.dataExpireTimeout, self._dict[key]) def removeExpiredPeers(self): for key in self._dict.keys(): @@ -46,9 +46,7 @@ def removeExpiredPeers(self): self._dict[key] = unexpired_peers def hasPeersForBlob(self, key): - if key in self._dict and len(self.filter_bad_and_expired_peers(key)): - return True - return False + return True if key in self._dict and len(self.filter_bad_and_expired_peers(key)) else False def addPeerToBlob(self, contact, key, compact_address, lastPublished, originallyPublished, originalPublisherID): if key in self._dict: @@ -60,12 +58,6 @@ def addPeerToBlob(self, contact, key, compact_address, lastPublished, originally def getPeersForBlob(self, key): return [] if key not in self._dict else [val[1] for val in self.filter_bad_and_expired_peers(key)] - def removePeer(self, value): - for key in self._dict: - self._dict[key] = [val for val in self._dict[key] if val[1] != value] - if not self._dict[key]: - del self._dict[key] - def getStoringContacts(self): contacts = set() for key in self._dict: From 9582b7fcf5f5c1a98b53537ec2ab590cdec72c1f Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 11:05:33 -0400 Subject: [PATCH 52/79] use maybe_ping for refreshing stale buckets and storing peers -move store refresh to its own looping call --- lbrynet/dht/node.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 09ff23efd3..e4add4bdc6 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -36,7 +36,6 @@ def expand_peer(compact_peer_info): return (peer_node_id, host, port) - def rpcmethod(func): """ Decorator to expose Node methods as remote procedure calls @@ -132,6 +131,7 @@ def __init__(self, node_id=None, udpPort=4000, dataStore=None, self.port = udpPort self._change_token_lc = self.get_looping_call(self.change_token) self._refresh_node_lc = self.get_looping_call(self._refreshNode) + self._refresh_contacts_lc = self.get_looping_call(self._refreshContacts) # Create k-buckets (for storing contacts) if routingTableClass is None: @@ -164,6 +164,7 @@ def stop(self): # stop LoopingCalls: yield self.safe_stop_looping_call(self._refresh_node_lc) yield self.safe_stop_looping_call(self._change_token_lc) + yield self.safe_stop_looping_call(self._refresh_contacts_lc) if self._listeningPort is not None: yield self._listeningPort.stopListening() self._listeningPort = None @@ -283,6 +284,7 @@ def start(self, known_node_addresses=None): self.safe_start_looping_call(self._change_token_lc, constants.tokenSecretChangeInterval) # Start refreshing k-buckets periodically, if necessary self.safe_start_looping_call(self._refresh_node_lc, constants.checkRefreshInterval) + self.safe_start_looping_call(self._refresh_contacts_lc, 60) @property def contacts(self): @@ -664,20 +666,18 @@ def _refreshNode(self): def _refreshContacts(self): return defer.DeferredList( - [contact.ping() for contact in self.contacts], consumeErrors=True + [self._protocol._ping_queue.enqueue_maybe_ping(contact, delay=0) for contact in self.contacts] ) def _refreshStoringPeers(self): storing_contacts = self._dataStore.getStoringContacts() return defer.DeferredList( - [self._protocol._ping_queue.enqueue_maybe_ping(contact) for contact in storing_contacts], - consumeErrors=True + [self._protocol._ping_queue.enqueue_maybe_ping(contact, delay=0) for contact in storing_contacts] ) @defer.inlineCallbacks def _refreshRoutingTable(self): - nodeIDs = self._routingTable.getRefreshList(0, True) - yield self._refreshContacts() + nodeIDs = self._routingTable.getRefreshList(0, False) while nodeIDs: searchID = nodeIDs.pop() yield self.iterativeFindNode(searchID) From 51b42da1c5c1668144488e5e0db8b736d3b9489c Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 16:15:12 -0400 Subject: [PATCH 53/79] remove unused scripts --- scripts/create_network.py | 92 ---------------- scripts/dht_scripts.py | 129 ---------------------- scripts/dhttest.py | 163 ---------------------------- scripts/node_rpc_cli.py | 41 ------- scripts/rpc_node.py | 214 ------------------------------------- scripts/simple_dht_node.py | 47 -------- 6 files changed, 686 deletions(-) delete mode 100644 scripts/create_network.py delete mode 100644 scripts/dht_scripts.py delete mode 100644 scripts/dhttest.py delete mode 100644 scripts/node_rpc_cli.py delete mode 100644 scripts/rpc_node.py delete mode 100755 scripts/simple_dht_node.py diff --git a/scripts/create_network.py b/scripts/create_network.py deleted file mode 100644 index 52726c4e19..0000000000 --- a/scripts/create_network.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python -# -# This library is free software, distributed under the terms of -# the GNU Lesser General Public License Version 3, or any later version. -# See the COPYING file included in this archive -# - -# Thanks to Paul Cannon for IP-address resolution functions (taken from aspn.activestate.com) - -import argparse -import os -import sys -import time -import signal - -amount = 0 - - -def destroyNetwork(nodes): - print 'Destroying Kademlia network' - i = 0 - for node in nodes: - i += 1 - hashAmount = i * 50 / amount - hashbar = '#' * hashAmount - output = '\r[%-50s] %d/%d' % (hashbar, i, amount) - sys.stdout.write(output) - time.sleep(0.15) - os.kill(node, signal.SIGTERM) - print - - -def main(): - parser = argparse.ArgumentParser(description="Launch a network of dht nodes") - - parser.add_argument("amount_of_nodes", - help="The number of nodes to create", - type=int) - parser.add_argument( - "--nic_ip_address", - help=("The network interface on which these nodes will listen for connections " - "from each other and from other nodes. If omitted, an attempt will be " - "made to automatically determine the system's IP address, but this may " - "result in the nodes being reachable only from this system")) - - args = parser.parse_args() - - global amount - amount = args.amount_of_nodes - if args.nic_ip_address: - ipAddress = args.nic_ip_address - else: - import socket - ipAddress = socket.gethostbyname(socket.gethostname()) - print 'Network interface IP address omitted; using %s' % ipAddress - - startPort = 4000 - port = startPort + 1 - nodes = [] - print 'Creating Kademlia network' - try: - node = os.spawnlp( - os.P_NOWAIT, 'lbrynet-launch-node', 'lbrynet-launch-node', str(startPort)) - nodes.append(node) - for i in range(amount - 1): - time.sleep(0.15) - hashAmount = i * 50 / amount - hashbar = '#' * hashAmount - output = '\r[%-50s] %d/%d' % (hashbar, i, amount) - sys.stdout.write(output) - node = os.spawnlp( - os.P_NOWAIT, 'lbrynet-launch-node', 'lbrynet-launch-node', str(port), - ipAddress, str(startPort)) - nodes.append(node) - port += 1 - except KeyboardInterrupt: - '\nNetwork creation cancelled.' - destroyNetwork(nodes) - sys.exit(1) - - print '\n\n---------------\nNetwork running\n---------------\n' - try: - while 1: - time.sleep(1) - except KeyboardInterrupt: - pass - finally: - destroyNetwork(nodes) - - -if __name__ == '__main__': - main() diff --git a/scripts/dht_scripts.py b/scripts/dht_scripts.py deleted file mode 100644 index 0aec28c57d..0000000000 --- a/scripts/dht_scripts.py +++ /dev/null @@ -1,129 +0,0 @@ -from lbrynet.core import log_support - -import logging.handlers -import sys -import traceback - -from lbrynet.dht.node import Node - -from twisted.internet import reactor, defer -from lbrynet.core.utils import generate_id - - -log = logging.getLogger(__name__) - - -def print_usage(): - print "Usage:\n%s UDP_PORT KNOWN_NODE_IP KNOWN_NODE_PORT HASH" - - -@defer.inlineCallbacks -def join_network(udp_port, known_nodes): - lbryid = generate_id() - - log.info('Creating node') - node = Node(udpPort=udp_port, node_id=lbryid) - - log.info('Joining network') - yield node.joinNetwork(known_nodes) - - defer.returnValue(node) - - -@defer.inlineCallbacks -def get_hosts(node, h): - log.info("Looking up %s", h) - hosts = yield node.getPeersForBlob(h.decode("hex")) - log.info("Hosts returned from the DHT: %s", hosts) - - -@defer.inlineCallbacks -def announce_hash(node, h): - results = yield node.announceHaveBlob(h, 34567) - for success, result in results: - if success: - log.info("Succeeded: %s", str(result)) - else: - log.info("Failed: %s", str(result.getErrorMessage())) - - -# def get_args(): -# if len(sys.argv) < 5: -# print_usage() -# sys.exit(1) -# udp_port = int(sys.argv[1]) -# known_nodes = [(sys.argv[2], int(sys.argv[3]))] -# h = binascii.unhexlify(sys.argv[4]) -# return udp_port, known_nodes, h - - -@defer.inlineCallbacks -def connect(port=None): - try: - if port is None: - raise Exception("need a port") - known_nodes = [('54.236.227.82', 4444)] # lbrynet1 - node = yield join_network(port, known_nodes) - log.info("joined") - reactor.callLater(3, find, node) - except Exception: - log.error("CAUGHT EXCEPTION") - traceback.print_exc() - log.info("Stopping reactor") - yield reactor.stop() - - -def getApproximateTotalDHTNodes(node): - from lbrynet.dht import constants - # get the deepest bucket and the number of contacts in that bucket and multiply it - # by the number of equivalently deep buckets in the whole DHT to get a really bad - # estimate! - bucket = node._routingTable._buckets[node._routingTable._kbucketIndex(node.node_id)] - num_in_bucket = len(bucket._contacts) - factor = (2 ** constants.key_bits) / (bucket.rangeMax - bucket.rangeMin) - return num_in_bucket * factor - - -def getApproximateTotalHashes(node): - # Divide the number of hashes we know about by k to get a really, really, really - # bad estimate of the average number of hashes per node, then multiply by the - # approximate number of nodes to get a horrendous estimate of the total number - # of hashes in the DHT - num_in_data_store = len(node._dataStore._dict) - if num_in_data_store == 0: - return 0 - return num_in_data_store * getApproximateTotalDHTNodes(node) / 8 - - -@defer.inlineCallbacks -def find(node): - try: - log.info("Approximate number of nodes in DHT: %s", str(getApproximateTotalDHTNodes(node))) - log.info("Approximate number of blobs in DHT: %s", str(getApproximateTotalHashes(node))) - - h = "578f5e82da7db97bfe0677826d452cc0c65406a8e986c9caa126af4ecdbf4913daad2f7f5d1fb0ffec17d0bf8f187f5a" - peersFake = yield node.getPeersForBlob(h.decode("hex")) - print peersFake - peers = yield node.getPeersForBlob(h.decode("hex")) - print peers - - # yield get_hosts(node, h) - except Exception: - log.error("CAUGHT EXCEPTION") - traceback.print_exc() - - log.info("Stopping reactor") - yield reactor.stop() - - - -def main(): - log_support.configure_console(level='DEBUG') - log_support.configure_twisted() - reactor.callLater(0, connect, port=10001) - log.info("Running reactor") - reactor.run() - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/scripts/dhttest.py b/scripts/dhttest.py deleted file mode 100644 index fe0a0af7fe..0000000000 --- a/scripts/dhttest.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env python -# -# This is a basic single-node example of how to use the Entangled -# DHT. It creates a Node and (optionally) joins an existing DHT. It -# then does a Kademlia store and find, and then it deletes the stored -# value (non-Kademlia method). -# -# No tuple space functionality is demonstrated by this script. -# -# To test it properly, start a multi-node Kademlia DHT with the "create_network.py" -# script and point this node to that, e.g.: -# $python create_network.py 10 127.0.0.1 -# -# $python basic_example.py 5000 127.0.0.1 4000 -# -# This library is free software, distributed under the terms of -# the GNU Lesser General Public License Version 3, or any later version. -# See the COPYING file included in this archive -# - -# Thanks to Paul Cannon for IP-address resolution functions (taken from aspn.activestate.com) - - -import binascii -import random -import twisted.internet.reactor -from lbrynet.dht.node import Node -from lbrynet.core.cryptoutils import get_lbry_hash_obj - -# The Entangled DHT node; instantiated in the main() method -node = None - -# The key to use for this example when storing/retrieving data -h = get_lbry_hash_obj() -h.update("key") -KEY = h.digest() -# The value to store -VALUE = random.randint(10000, 20000) - -lbryid = KEY - - -def storeValue(key, value): - """ Stores the specified value in the DHT using the specified key """ - global node - print '\nStoring value; Key: %s, Value: %s' % (key, value) - # Store the value in the DHT. This method returns a Twisted - # Deferred result, which we then add callbacks to - deferredResult = node.announceHaveHash(key, value) - # Add our callback; this method is called when the operation completes... - deferredResult.addCallback(storeValueCallback) - # ...and for error handling, add an "error callback" as well. - # - # For this example script, I use a generic error handler; usually - # you would need something more specific - deferredResult.addErrback(genericErrorCallback) - - -def storeValueCallback(*args, **kwargs): - """ Callback function that is invoked when the storeValue() operation succeeds """ - print 'Value has been stored in the DHT' - # Now that the value has been stored, schedule that the value is read again after 2.5 seconds - print 'Scheduling retrieval in 2.5 seconds' - twisted.internet.reactor.callLater(2.5, getValue) - - -def genericErrorCallback(failure): - """ Callback function that is invoked if an error occurs during any of the DHT operations """ - print 'An error has occurred:', failure.getErrorMessage() - twisted.internet.reactor.callLater(0, stop) - - -def getValue(): - """ Retrieves the value of the specified key (KEY) from the DHT """ - global node, KEY - # Get the value for the specified key (immediately returns a Twisted deferred result) - print ('\nRetrieving value from DHT for key "%s"' % - binascii.unhexlify("f7d9dc4de674eaa2c5a022eb95bc0d33ec2e75c6")) - deferredResult = node.iterativeFindValue( - binascii.unhexlify("f7d9dc4de674eaa2c5a022eb95bc0d33ec2e75c6")) - # Add a callback to this result; this will be called as soon as the operation has completed - deferredResult.addCallback(getValueCallback) - # As before, add the generic error callback - deferredResult.addErrback(genericErrorCallback) - - -def getValueCallback(result): - """ Callback function that is invoked when the getValue() operation succeeds """ - # Check if the key was found (result is a dict of format {key: - # value}) or not (in which case a list of "closest" Kademlia - # contacts would be returned instead") - print "Got the value" - print result - - # Either way, schedule a "delete" operation for the key - print 'Scheduling shutdown in 2.5 seconds' - twisted.internet.reactor.callLater(2.5, stop) - - -def stop(): - """ Stops the Twisted reactor, and thus the script """ - print '\nStopping Kademlia node and terminating script' - twisted.internet.reactor.stop() - - -if __name__ == '__main__': - import sys - - if len(sys.argv) < 2: - print 'Usage:\n%s UDP_PORT [KNOWN_NODE_IP KNOWN_NODE_PORT]' % sys.argv[0] - print 'or:\n%s UDP_PORT [FILE_WITH_KNOWN_NODES]' % sys.argv[0] - print - print 'If a file is specified, it should containg one IP address and UDP port' - print 'per line, seperated by a space.' - sys.exit(1) - try: - int(sys.argv[1]) - except ValueError: - print '\nUDP_PORT must be an integer value.\n' - print 'Usage:\n%s UDP_PORT [KNOWN_NODE_IP KNOWN_NODE_PORT]' % sys.argv[0] - print 'or:\n%s UDP_PORT [FILE_WITH_KNOWN_NODES]' % sys.argv[0] - print - print 'If a file is specified, it should contain one IP address and UDP port' - print 'per line, seperated by a space.' - sys.exit(1) - - if len(sys.argv) == 4: - knownNodes = [(sys.argv[2], int(sys.argv[3]))] - elif len(sys.argv) == 3: - knownNodes = [] - f = open(sys.argv[2], 'r') - lines = f.readlines() - f.close() - for line in lines: - ipAddress, udpPort = line.split() - knownNodes.append((ipAddress, int(udpPort))) - else: - knownNodes = None - print '\nNOTE: You have not specified any remote DHT node(s) to connect to' - print 'It will thus not be aware of any existing DHT, but will still function as' - print ' a self-contained DHT (until another node contacts it).' - print 'Run this script without any arguments for info.\n' - - # Set up SQLite-based data store (you could use an in-memory store instead, for example) - # - # Create the Entangled node. It extends the functionality of a - # basic Kademlia node (but is fully backwards-compatible with a - # Kademlia-only network) - # - # If you wish to have a pure Kademlia network, use the - # entangled.kademlia.node.Node class instead - print 'Creating Node' - node = Node(udpPort=int(sys.argv[1]), node_id=lbryid) - - # Schedule the node to join the Kademlia/Entangled DHT - node.joinNetwork(knownNodes) - # Schedule the "storeValue() call to be invoked after 2.5 seconds, - # using KEY and VALUE as arguments - twisted.internet.reactor.callLater(2.5, getValue) - # Start the Twisted reactor - this fires up all networking, and - # allows the scheduled join operation to take place - print 'Twisted reactor started (script will commence in 2.5 seconds)' - twisted.internet.reactor.run() diff --git a/scripts/node_rpc_cli.py b/scripts/node_rpc_cli.py deleted file mode 100644 index 43ffa6daff..0000000000 --- a/scripts/node_rpc_cli.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -CLI for sending rpc commands to a DHT node -""" - -import argparse - -from twisted.internet import reactor -from txjsonrpc.web.jsonrpc import Proxy - - -def print_value(value): - print value - - -def print_error(err): - print err.getErrorMessage() - - -def shut_down(): - reactor.stop() - - -def main(): - parser = argparse.ArgumentParser(description="Send an rpc command to a dht node") - parser.add_argument("rpc_command", - help="The rpc command to send to the dht node") - parser.add_argument("--node_host", - help="The host of the node to connect to", - default="127.0.0.1") - parser.add_argument("--node_port", - help="The port of the node to connect to", - default="8888") - - args = parser.parse_args() - connect_string = 'http://%s:%s' % (args.node_host, args.node_port) - proxy = Proxy(connect_string) - - d = proxy.callRemote(args.rpc_command) - d.addCallbacks(print_value, print_error) - d.addBoth(lambda _: shut_down()) - reactor.run() diff --git a/scripts/rpc_node.py b/scripts/rpc_node.py deleted file mode 100644 index 40d69b8e76..0000000000 --- a/scripts/rpc_node.py +++ /dev/null @@ -1,214 +0,0 @@ -import logging -import requests -import miniupnpc -import argparse -from copy import deepcopy -from twisted.internet import reactor, defer -from twisted.web import resource -from twisted.web.server import Site - -from lbrynet import conf -from lbrynet.core.log_support import configure_console -from lbrynet.dht.error import TimeoutError -conf.initialize_settings() - -log = logging.getLogger("dht tool") -configure_console() -log.setLevel(logging.INFO) - -from lbrynet.dht.node import Node -from lbrynet.dht.contact import Contact -from lbrynet.daemon.auth.server import AuthJSONRPCServer -from lbrynet.core.utils import generate_id - -def get_external_ip_and_setup_upnp(): - try: - u = miniupnpc.UPnP() - u.discoverdelay = 200 - u.discover() - u.selectigd() - - if u.getspecificportmapping(4444, "UDP"): - u.deleteportmapping(4444, "UDP") - log.info("Removed UPnP redirect for UDP 4444.") - u.addportmapping(4444, 'UDP', u.lanaddr, 4444, 'LBRY DHT port', '') - log.info("got external ip from upnp") - return u.externalipaddress() - except Exception: - log.exception("derp") - r = requests.get('https://api.ipify.org', {'format': 'json'}) - log.info("got external ip from ipify.org") - return r.json()['ip'] - - -class NodeRPC(AuthJSONRPCServer): - def __init__(self, lbryid, seeds, node_port, rpc_port): - AuthJSONRPCServer.__init__(self, False) - self.root = None - self.port = None - self.seeds = seeds - self.node_port = node_port - self.rpc_port = rpc_port - if lbryid: - lbryid = lbryid.decode('hex') - else: - lbryid = generate_id() - self.node_id = lbryid - self.external_ip = get_external_ip_and_setup_upnp() - self.node_port = node_port - - @defer.inlineCallbacks - def setup(self): - self.node = Node(node_id=self.node_id, udpPort=self.node_port, - externalIP=self.external_ip) - hosts = [] - for hostname, hostport in self.seeds: - host_ip = yield reactor.resolve(hostname) - hosts.append((host_ip, hostport)) - log.info("connecting to dht") - yield self.node.joinNetwork(tuple(hosts)) - log.info("connected to dht") - if not self.announced_startup: - self.announced_startup = True - self.start_api() - log.info("lbry id: %s (%i bytes)", self.node.node_id.encode('hex'), len(self.node.node_id)) - - def start_api(self): - root = resource.Resource() - root.putChild('', self) - self.port = reactor.listenTCP(self.rpc_port, Site(root), interface='localhost') - log.info("started jsonrpc server") - - @defer.inlineCallbacks - def jsonrpc_node_id_set(self, node_id): - old_id = self.node.node_id - self.node.stop() - del self.node - self.node_id = node_id.decode('hex') - yield self.setup() - msg = "changed dht id from %s to %s" % (old_id.encode('hex'), - self.node.node_id.encode('hex')) - defer.returnValue(msg) - - def jsonrpc_node_id_get(self): - return self._render_response(self.node.node_id.encode('hex')) - - @defer.inlineCallbacks - def jsonrpc_peer_find(self, node_id): - node_id = node_id.decode('hex') - contact = yield self.node.findContact(node_id) - result = None - if contact: - result = (contact.address, contact.port) - defer.returnValue(result) - - @defer.inlineCallbacks - def jsonrpc_peer_list_for_blob(self, blob_hash): - peers = yield self.node.getPeersForBlob(blob_hash.decode('hex')) - defer.returnValue(peers) - - @defer.inlineCallbacks - def jsonrpc_ping(self, node_id): - contact_host = yield self.jsonrpc_peer_find(node_id=node_id) - if not contact_host: - defer.returnValue("failed to find node") - contact_ip, contact_port = contact_host - contact = Contact(node_id.decode('hex'), contact_ip, contact_port, self.node._protocol) - try: - result = yield contact.ping() - except TimeoutError: - self.node.removeContact(contact.id) - self.node._dataStore.removePeer(contact.id) - result = {'error': 'timeout'} - defer.returnValue(result) - - def get_routing_table(self): - result = {} - data_store = deepcopy(self.node._dataStore._dict) - datastore_len = len(data_store) - hosts = {} - missing_contacts = [] - if datastore_len: - for k, v in data_store.iteritems(): - for value, lastPublished, originallyPublished, originalPublisherID in v: - try: - contact = self.node._routingTable.getContact(originalPublisherID) - except ValueError: - if originalPublisherID.encode('hex') not in missing_contacts: - missing_contacts.append(originalPublisherID.encode('hex')) - continue - if contact in hosts: - blobs = hosts[contact] - else: - blobs = [] - blobs.append(k.encode('hex')) - hosts[contact] = blobs - - contact_set = [] - blob_hashes = [] - result['buckets'] = {} - - for i in range(len(self.node._routingTable._buckets)): - for contact in self.node._routingTable._buckets[i]._contacts: - contacts = result['buckets'].get(i, []) - if contact in hosts: - blobs = hosts[contact] - del hosts[contact] - else: - blobs = [] - host = { - "address": contact.address, - "id": contact.id.encode("hex"), - "blobs": blobs, - } - for blob_hash in blobs: - if blob_hash not in blob_hashes: - blob_hashes.append(blob_hash) - contacts.append(host) - result['buckets'][i] = contacts - contact_set.append(contact.id.encode("hex")) - if hosts: - result['datastore extra'] = [ - { - "id": host.id.encode('hex'), - "blobs": hosts[host], - } - for host in hosts] - result['missing contacts'] = missing_contacts - result['contacts'] = contact_set - result['blob hashes'] = blob_hashes - result['node id'] = self.node_id.encode('hex') - return result - - def jsonrpc_routing_table_get(self): - return self._render_response(self.get_routing_table()) - - -def main(): - parser = argparse.ArgumentParser(description="Launch a dht node which responds to rpc commands") - parser.add_argument("--node_port", - help=("The UDP port on which the node will listen for connections " - "from other dht nodes"), - type=int, default=4444) - parser.add_argument("--rpc_port", - help="The TCP port on which the node will listen for rpc commands", - type=int, default=5280) - parser.add_argument("--bootstrap_host", - help="The IP of a DHT node to be used to bootstrap into the network", - default='lbrynet1.lbry.io') - parser.add_argument("--node_id", - help="The IP of a DHT node to be used to bootstrap into the network", - default=None) - parser.add_argument("--bootstrap_port", - help="The port of a DHT node to be used to bootstrap into the network", - default=4444, type=int) - - args = parser.parse_args() - seeds = [(args.bootstrap_host, args.bootstrap_port)] - server = NodeRPC(args.node_id, seeds, args.node_port, args.rpc_port) - reactor.addSystemEventTrigger('after', 'startup', server.setup) - reactor.run() - - -if __name__ == "__main__": - main() diff --git a/scripts/simple_dht_node.py b/scripts/simple_dht_node.py deleted file mode 100755 index cb8bc727e7..0000000000 --- a/scripts/simple_dht_node.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python - -from lbrynet.core import log_support -import logging.handlers -import sys -import time -from pprint import pprint - -from twisted.internet import defer, reactor -from lbrynet.dht.node import Node -import lbrynet.dht.constants -import lbrynet.dht.datastore -from lbrynet.tests.util import random_lbry_hash - -log = logging.getLogger(__name__) - - -@defer.inlineCallbacks -def run(): - nodeid = "9648996b4bef3ff41176668a0577f86aba7f1ea2996edd18f9c42430802c8085331345c5f0c44a7f352e2ba8ae59aaaa".decode("hex") - node = Node(node_id=nodeid, externalIP='127.0.0.1', udpPort=21999, peerPort=1234) - node.startNetwork() - yield node.joinNetwork([("127.0.0.1", 21001)]) - - print "" - print "" - print "" - print "" - print "" - print "" - - yield node.announceHaveBlob("2bb150cb996b4bef3ff41176648a0577f86abb7f1ea2996edd18f9c42430802c8085331345c5f0c44a7f352e2ba8ae59".decode("hex")) - - log.info("Shutting down...") - reactor.callLater(1, reactor.stop) - - -def main(): - log_support.configure_console(level='DEBUG') - log_support.configure_twisted() - reactor.callLater(0, run) - log.info("Running reactor") - reactor.run() - - -if __name__ == '__main__': - sys.exit(main()) From 545930cea4dae62db7c1107a52323f42dabfae56 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 16:16:19 -0400 Subject: [PATCH 54/79] dht test environment fixes --- lbrynet/tests/functional/dht/dht_test_environment.py | 11 ++++++----- lbrynet/tests/functional/dht/mock_transport.py | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lbrynet/tests/functional/dht/dht_test_environment.py b/lbrynet/tests/functional/dht/dht_test_environment.py index 57af2c68ad..debf061e04 100644 --- a/lbrynet/tests/functional/dht/dht_test_environment.py +++ b/lbrynet/tests/functional/dht/dht_test_environment.py @@ -1,6 +1,7 @@ import logging from twisted.trial import unittest from twisted.internet import defer, task +from lbrynet.dht import constants from lbrynet.dht.node import Node from mock_transport import resolve, listenUDP, MOCK_DHT_SEED_DNS, mock_node_generator @@ -45,8 +46,8 @@ def pump_clock(self, n, step=0.1, tick_callback=None): """ for _ in range(int(n * (1.0 / float(step)))): self.clock.advance(step) - if tick_callback and callable(tick_callback): - tick_callback(self.clock.seconds()) + if tick_callback and callable(tick_callback): + tick_callback(self.clock.seconds()) def run_reactor(self, seconds, deferreds, tick_callback=None): d = defer.DeferredList(deferreds) @@ -110,14 +111,14 @@ def setUp(self): seed_dl.append( seed.start(known_addresses) ) - yield self.run_reactor(901, seed_dl) + yield self.run_reactor(constants.checkRefreshInterval+1, seed_dl) while len(self.nodes + self._seeds) < self.network_size: network_dl = [] for i in range(min(10, self.network_size - len(self._seeds) - len(self.nodes))): network_dl.append(self.add_node()) - yield self.run_reactor(31, network_dl) + yield self.run_reactor(constants.checkRefreshInterval*2+1, network_dl) self.assertEqual(len(self.nodes + self._seeds), self.network_size) - self.pump_clock(1800) + self.pump_clock(3600) self.verify_all_nodes_are_routable() self.verify_all_nodes_are_pingable() diff --git a/lbrynet/tests/functional/dht/mock_transport.py b/lbrynet/tests/functional/dht/mock_transport.py index 3ce0bae763..c46ad30e27 100644 --- a/lbrynet/tests/functional/dht/mock_transport.py +++ b/lbrynet/tests/functional/dht/mock_transport.py @@ -1,7 +1,7 @@ import struct +import hashlib import logging from twisted.internet import defer, error -from lbrynet.core.utils import generate_id from lbrynet.dht.encoding import Bencode from lbrynet.dht.error import DecodeError from lbrynet.dht.msgformat import DefaultFormat @@ -121,7 +121,9 @@ def mock_node_generator(count=None, mock_node_ids=MOCK_DHT_NODES): if count and num >= count: break if num >= len(mock_node_ids): - node_id = generate_id().encode('hex') + h = hashlib.sha384() + h.update("node %i" % num) + node_id = h.hexdigest() else: node_id = mock_node_ids[num] yield (node_id, node_ip) From 659632b66cdd73e4c101e54fcbc7b92cbad54c01 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 16:22:30 -0400 Subject: [PATCH 55/79] fix and update tests --- lbrynet/core/Session.py | 13 +- lbrynet/dht/routingtable.py | 6 +- .../functional/dht/test_bootstrap_network.py | 24 +++ .../functional/dht/test_contact_expiration.py | 15 +- .../functional/dht/test_contact_rejoin.py | 66 ++----- lbrynet/tests/functional/dht/test_store.py | 101 ++++------- lbrynet/tests/unit/dht/test_contact.py | 14 +- lbrynet/tests/unit/dht/test_datastore.py | 130 -------------- lbrynet/tests/unit/dht/test_node.py | 4 +- lbrynet/tests/unit/dht/test_protocol.py | 167 ------------------ lbrynet/tests/unit/dht/test_routingtable.py | 85 ++++----- 11 files changed, 129 insertions(+), 496 deletions(-) delete mode 100644 lbrynet/tests/unit/dht/test_datastore.py delete mode 100644 lbrynet/tests/unit/dht/test_protocol.py diff --git a/lbrynet/core/Session.py b/lbrynet/core/Session.py index c2aefafc60..634fa99097 100644 --- a/lbrynet/core/Session.py +++ b/lbrynet/core/Session.py @@ -32,14 +32,11 @@ class Session(object): peers can connect to this peer. """ - def __init__(self, blob_data_payment_rate, db_dir=None, - node_id=None, peer_manager=None, dht_node_port=None, - known_dht_nodes=None, peer_finder=None, - hash_announcer=None, blob_dir=None, - blob_manager=None, peer_port=None, use_upnp=True, - rate_limiter=None, wallet=None, - dht_node_class=node.Node, blob_tracker_class=None, - payment_rate_manager_class=None, is_generous=True, external_ip=None, storage=None): + def __init__(self, blob_data_payment_rate, db_dir=None, node_id=None, peer_manager=None, dht_node_port=None, + known_dht_nodes=None, peer_finder=None, hash_announcer=None, blob_dir=None, blob_manager=None, + peer_port=None, use_upnp=True, rate_limiter=None, wallet=None, dht_node_class=node.Node, + blob_tracker_class=None, payment_rate_manager_class=None, is_generous=True, external_ip=None, + storage=None): """@param blob_data_payment_rate: The default payment rate for blob data @param db_dir: The directory in which levelDB files should be stored diff --git a/lbrynet/dht/routingtable.py b/lbrynet/dht/routingtable.py index 6e5456bb84..c58982d12d 100644 --- a/lbrynet/dht/routingtable.py +++ b/lbrynet/dht/routingtable.py @@ -63,10 +63,7 @@ def _shouldSplit(self, bucketIndex, toAdd): contacts = self.get_contacts() distance = Distance(self._parentNodeID) contacts.sort(key=lambda c: distance(c.id)) - if len(contacts) < constants.k: - kth_contact = contacts[-1] - else: - kth_contact = contacts[constants.k-1] + kth_contact = contacts[-1] if len(contacts) < constants.k else contacts[constants.k-1] return distance(toAdd) < distance(kth_contact.id) def addContact(self, contact): @@ -91,7 +88,6 @@ def addContact(self, contact): # Retry the insertion attempt return self.addContact(contact) else: - # We can't split the k-bucket # # The 13 page kademlia paper specifies that the least recently contacted node in the bucket diff --git a/lbrynet/tests/functional/dht/test_bootstrap_network.py b/lbrynet/tests/functional/dht/test_bootstrap_network.py index e31c87fe06..e9aeed1458 100644 --- a/lbrynet/tests/functional/dht/test_bootstrap_network.py +++ b/lbrynet/tests/functional/dht/test_bootstrap_network.py @@ -1,3 +1,4 @@ +from twisted.trial import unittest from dht_test_environment import TestKademliaBase @@ -8,3 +9,26 @@ class TestKademliaBootstrap(TestKademliaBase): def test_bootstrap_seed_nodes(self): pass + + +@unittest.SkipTest +class TestKademliaBootstrap40Nodes(TestKademliaBase): + network_size = 40 + + def test_bootstrap_network(self): + pass + + +class TestKademliaBootstrap80Nodes(TestKademliaBase): + network_size = 80 + + def test_bootstrap_network(self): + pass + + +@unittest.SkipTest +class TestKademliaBootstrap120Nodes(TestKademliaBase): + network_size = 120 + + def test_bootstrap_network(self): + pass diff --git a/lbrynet/tests/functional/dht/test_contact_expiration.py b/lbrynet/tests/functional/dht/test_contact_expiration.py index 44d20d98cd..965c0c31e5 100644 --- a/lbrynet/tests/functional/dht/test_contact_expiration.py +++ b/lbrynet/tests/functional/dht/test_contact_expiration.py @@ -1,5 +1,6 @@ import logging from twisted.internet import defer +from lbrynet.dht import constants from dht_test_environment import TestKademliaBase log = logging.getLogger() @@ -12,7 +13,6 @@ class TestPeerExpiration(TestKademliaBase): def test_expire_stale_peers(self): removed_addresses = set() removed_nodes = [] - self.show_info() # stop 5 nodes for _ in range(5): @@ -26,16 +26,15 @@ def test_expire_stale_peers(self): self.assertSetEqual(offline_addresses, removed_addresses) get_nodes_with_stale_contacts = lambda: filter(lambda node: any(contact.address in offline_addresses - for contact in node.contacts), self.nodes + self._seeds) + for contact in node.contacts), + self.nodes + self._seeds) self.assertRaises(AssertionError, self.verify_all_nodes_are_routable) self.assertTrue(len(get_nodes_with_stale_contacts()) > 1) - # run the network for an hour, which should expire the removed nodes - for _ in range(60): - log.info("Time is %f, nodes with stale contacts: %i/%i", self.clock.seconds(), - len(get_nodes_with_stale_contacts()), len(self.nodes + self._seeds)) - self.pump_clock(60) - self.assertTrue(len(get_nodes_with_stale_contacts()) == 0) + # run the network long enough for two failures to happen + self.pump_clock(constants.checkRefreshInterval * 3) + + self.assertEquals(len(get_nodes_with_stale_contacts()), 0) self.verify_all_nodes_are_routable() self.verify_all_nodes_are_pingable() diff --git a/lbrynet/tests/functional/dht/test_contact_rejoin.py b/lbrynet/tests/functional/dht/test_contact_rejoin.py index 72cb939fc9..1f770b442a 100644 --- a/lbrynet/tests/functional/dht/test_contact_rejoin.py +++ b/lbrynet/tests/functional/dht/test_contact_rejoin.py @@ -1,5 +1,6 @@ import logging from twisted.internet import defer +from lbrynet.dht import constants from dht_test_environment import TestKademliaBase log = logging.getLogger() @@ -9,62 +10,29 @@ class TestReJoin(TestKademliaBase): network_size = 40 @defer.inlineCallbacks - def test_re_join(self): - - removed_node = self.nodes[0] - self.nodes.remove(removed_node) - yield self.run_reactor(1, [removed_node.stop()]) - - # run the network for an hour, which should expire the removed node - self.pump_clock(3600) - self.verify_all_nodes_are_routable() - self.verify_all_nodes_are_pingable() - self.nodes.append(removed_node) - yield self.run_reactor( - 31, [removed_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] - ) - self.pump_clock(901) + def setUp(self): + yield super(TestReJoin, self).setUp() + self.removed_node = self.nodes[20] + self.nodes.remove(self.removed_node) + yield self.run_reactor(1, [self.removed_node.stop()]) + self.pump_clock(constants.checkRefreshInterval * 2) self.verify_all_nodes_are_routable() self.verify_all_nodes_are_pingable() @defer.inlineCallbacks - def test_re_join_with_new_ip(self): - - removed_node = self.nodes[0] - self.nodes.remove(removed_node) - yield self.run_reactor(1, [removed_node.stop()]) - - # run the network for an hour, which should expire the removed node - for _ in range(60): - self.pump_clock(60) - self.verify_all_nodes_are_routable() - self.verify_all_nodes_are_pingable() - removed_node.externalIP = "10.43.43.43" - self.nodes.append(removed_node) + def test_re_join(self): + self.nodes.append(self.removed_node) yield self.run_reactor( - 31, [removed_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] + 31, [self.removed_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] ) - self.pump_clock(901) + self.pump_clock(constants.checkRefreshInterval*2) self.verify_all_nodes_are_routable() self.verify_all_nodes_are_pingable() - @defer.inlineCallbacks - def test_re_join_with_new_node_id(self): - - removed_node = self.nodes[0] - self.nodes.remove(removed_node) - yield self.run_reactor(1, [removed_node.stop()]) + def test_re_join_with_new_ip(self): + self.removed_node.externalIP = "10.43.43.43" + return self.test_re_join() - # run the network for an hour, which should expire the removed node - for _ in range(60): - self.pump_clock(60) - self.verify_all_nodes_are_routable() - self.verify_all_nodes_are_pingable() - removed_node.node_id = removed_node._generateID() - self.nodes.append(removed_node) - yield self.run_reactor( - 31, [removed_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] - ) - self.pump_clock(901) - self.verify_all_nodes_are_routable() - self.verify_all_nodes_are_pingable() + def test_re_join_with_new_node_id(self): + self.removed_node.node_id = self.removed_node._generateID() + return self.test_re_join() diff --git a/lbrynet/tests/functional/dht/test_store.py b/lbrynet/tests/functional/dht/test_store.py index 911ea2563b..43d5fd6ae0 100644 --- a/lbrynet/tests/functional/dht/test_store.py +++ b/lbrynet/tests/functional/dht/test_store.py @@ -8,12 +8,12 @@ log = logging.getLogger() -class TestStore(TestKademliaBase): +class TestStoreExpiration(TestKademliaBase): network_size = 40 @defer.inlineCallbacks def test_store_and_expire(self): - blob_hash = generate_id() + blob_hash = generate_id(1) announcing_node = self.nodes[20] # announce the blob announce_d = announcing_node.announceHaveBlob(blob_hash) @@ -61,69 +61,12 @@ def test_store_and_expire(self): self.assertEquals(len(node._dataStore.getStoringContacts()), 0) self.assertTrue(blob_hash not in node._dataStore._dict) # the looping call should have fired - @defer.inlineCallbacks - def test_refresh_storing_peers(self): - blob_hash = generate_id() - announcing_node = self.nodes[20] - # announce the blob - announce_d = announcing_node.announceHaveBlob(blob_hash) - self.pump_clock(5) - storing_node_ids = yield announce_d - all_nodes = set(self.nodes).union(set(self._seeds)) - - # verify the nodes we think stored it did actually store it - storing_nodes = [node for node in all_nodes if node.node_id.encode('hex') in storing_node_ids] - self.assertEquals(len(storing_nodes), len(storing_node_ids)) - self.assertEquals(len(storing_nodes), constants.k) - for node in storing_nodes: - self.assertTrue(node._dataStore.hasPeersForBlob(blob_hash)) - datastore_result = node._dataStore.getPeersForBlob(blob_hash) - self.assertEquals(map(lambda contact: (contact.id, contact.address, contact.port), - node._dataStore.getStoringContacts()), [(announcing_node.node_id, - announcing_node.externalIP, - announcing_node.port)]) - self.assertEquals(len(datastore_result), 1) - expanded_peers = [] - for peer in datastore_result: - host = ".".join([str(ord(d)) for d in peer[:4]]) - port, = struct.unpack('>H', peer[4:6]) - peer_node_id = peer[6:] - if (host, port, peer_node_id) not in expanded_peers: - expanded_peers.append((peer_node_id, host, port)) - self.assertEquals(expanded_peers[0], - (announcing_node.node_id, announcing_node.externalIP, announcing_node.peerPort)) - - self.pump_clock(constants.checkRefreshInterval + 1) # tick the clock forward (so the nodes refresh) - - # verify the announced blob expires in the storing nodes datastores - - self.clock.advance(constants.dataExpireTimeout) # skip the clock directly ahead - for node in storing_nodes: - self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) - datastore_result = node._dataStore.getPeersForBlob(blob_hash) - self.assertEquals(len(datastore_result), 0) - self.assertTrue(blob_hash in node._dataStore._dict) # the looping call shouldn't have removed it yet - self.assertEquals(len(node._dataStore.getStoringContacts()), 1) - - self.pump_clock(constants.checkRefreshInterval + 1) # tick the clock forward (so the nodes refresh) - for node in storing_nodes: - self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) - datastore_result = node._dataStore.getPeersForBlob(blob_hash) - self.assertEquals(len(datastore_result), 0) - self.assertEquals(len(node._dataStore.getStoringContacts()), 0) - self.assertTrue(blob_hash not in node._dataStore._dict) # the looping call should have fired after - - -class TestStoringNodeWentStale(TestKademliaBase): - network_size = 40 - @defer.inlineCallbacks def test_storing_node_went_stale_then_came_back(self): - blob_hash = generate_id() + blob_hash = generate_id(1) announcing_node = self.nodes[20] # announce the blob announce_d = announcing_node.announceHaveBlob(blob_hash) - announce_time = self.clock.seconds() self.pump_clock(5) storing_node_ids = yield announce_d all_nodes = set(self.nodes).union(set(self._seeds)) @@ -150,30 +93,31 @@ def test_storing_node_went_stale_then_came_back(self): self.assertEquals(expanded_peers[0], (announcing_node.node_id, announcing_node.externalIP, announcing_node.peerPort)) - self.nodes.remove(announcing_node) - yield self.run_reactor(1, [announcing_node.stop()]) + self.pump_clock(constants.checkRefreshInterval*2) - # run the network for an hour, which should expire the removed node and the announced value - self.pump_clock(3600) + # stop the node + self.nodes.remove(announcing_node) + yield self.run_reactor(31, [announcing_node.stop()]) + # run the network for an hour, which should expire the removed node and turn the announced value stale + self.pump_clock(constants.checkRefreshInterval * 4, constants.checkRefreshInterval/2) self.verify_all_nodes_are_routable() - self.verify_all_nodes_are_pingable() - for node in storing_nodes: # make sure the contact isn't returned as a peer for the blob, but that - # we still have the entry in the datastore in case the node returns + # make sure the contact isn't returned as a peer for the blob, but that we still have the entry in the + # datastore in case the node comes back + for node in storing_nodes: self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) datastore_result = node._dataStore.getPeersForBlob(blob_hash) self.assertEquals(len(datastore_result), 0) self.assertEquals(len(node._dataStore.getStoringContacts()), 1) self.assertTrue(blob_hash in node._dataStore._dict) - # bring the announcing node back online + # # bring the announcing node back online self.nodes.append(announcing_node) yield self.run_reactor( 31, [announcing_node.start([(seed_name, 4444) for seed_name in sorted(self.seed_dns.keys())])] ) - self.pump_clock(24*60+1) # FIXME: this should work after 12 minutes + 1 second, yet it doesnt + self.pump_clock(constants.checkRefreshInterval * 2) self.verify_all_nodes_are_routable() - self.verify_all_nodes_are_pingable() # now the announcing node should once again be returned as a peer for the blob for node in storing_nodes: @@ -183,4 +127,19 @@ def test_storing_node_went_stale_then_came_back(self): self.assertEquals(len(node._dataStore.getStoringContacts()), 1) self.assertTrue(blob_hash in node._dataStore._dict) - # TODO: handle the case where the announcing node re joins with a different address from what is stored + # verify the announced blob expires in the storing nodes datastores + self.clock.advance(constants.dataExpireTimeout) # skip the clock directly ahead + for node in storing_nodes: + self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(len(datastore_result), 0) + self.assertTrue(blob_hash in node._dataStore._dict) # the looping call shouldn't have removed it yet + self.assertEquals(len(node._dataStore.getStoringContacts()), 1) + + self.pump_clock(constants.checkRefreshInterval + 1) # tick the clock forward (so the nodes refresh) + for node in storing_nodes: + self.assertFalse(node._dataStore.hasPeersForBlob(blob_hash)) + datastore_result = node._dataStore.getPeersForBlob(blob_hash) + self.assertEquals(len(datastore_result), 0) + self.assertEquals(len(node._dataStore.getStoringContacts()), 0) + self.assertTrue(blob_hash not in node._dataStore._dict) # the looping call should have fired diff --git a/lbrynet/tests/unit/dht/test_contact.py b/lbrynet/tests/unit/dht/test_contact.py index c9ebdc7461..9a6b3cf553 100644 --- a/lbrynet/tests/unit/dht/test_contact.py +++ b/lbrynet/tests/unit/dht/test_contact.py @@ -15,6 +15,14 @@ def setUp(self): self.secondContactCopy = self.contact_manager.make_contact(self.node_ids[0], '192.168.0.1', 1000, None, 32) self.firstContactDifferentValues = self.contact_manager.make_contact(self.node_ids[1], '192.168.1.20', 1000, None, 50) + self.assertRaises(ValueError, self.contact_manager.make_contact, self.node_ids[1], '192.168.1.20', + 100000, None) + self.assertRaises(ValueError, self.contact_manager.make_contact, self.node_ids[1], '192.168.1.20.1', + 1000, None) + self.assertRaises(ValueError, self.contact_manager.make_contact, self.node_ids[1], 'this is not an ip', + 1000, None) + self.assertRaises(ValueError, self.contact_manager.make_contact, "this is not a node id", '192.168.1.20.1', + 1000, None) def testNoDuplicateContactObjects(self): self.assertTrue(self.secondContact is self.secondContactCopy) @@ -74,7 +82,7 @@ def test_stale_then_fail(self): def test_good_turned_stale(self): self.contact.update_last_replied() self.assertTrue(self.contact.contact_is_good is True) - self.clock.advance((constants.refreshTimeout / 4) - 1) + self.clock.advance(constants.checkRefreshInterval - 1) self.assertTrue(self.contact.contact_is_good is True) self.clock.advance(1) self.assertTrue(self.contact.contact_is_good is None) @@ -112,7 +120,7 @@ def test_good_then_fail_then_good(self): self.assertTrue(self.contact.contact_is_good is True) # it goes stale - self.clock.advance((constants.refreshTimeout / 4) - 2) + self.clock.advance(constants.checkRefreshInterval - 2) self.assertTrue(self.contact.contact_is_good is True) self.clock.advance(1) self.assertTrue(self.contact.contact_is_good is None) @@ -134,7 +142,7 @@ def test_previous_replied_then_requested(self): self.assertTrue(self.contact.contact_is_good is True) # it goes stale - self.clock.advance((constants.refreshTimeout / 4) - 1) + self.clock.advance(constants.checkRefreshInterval - 1) self.assertTrue(self.contact.contact_is_good is True) self.clock.advance(1) self.assertTrue(self.contact.contact_is_good is None) diff --git a/lbrynet/tests/unit/dht/test_datastore.py b/lbrynet/tests/unit/dht/test_datastore.py deleted file mode 100644 index a431f4aac1..0000000000 --- a/lbrynet/tests/unit/dht/test_datastore.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python -# -# This library is free software, distributed under the terms of -# the GNU Lesser General Public License Version 3, or any later version. -# See the COPYING file included in this archive - -from twisted.trial import unittest -import time -import hashlib - -from lbrynet.dht.datastore import DictDataStore -from lbrynet.dht import constants - - -class DictDataStoreTest(unittest.TestCase): - """ Basic tests case for the reference DataStore API and implementation """ - def setUp(self): - self.ds = DictDataStore() - h = hashlib.sha384() - h.update('g') - hashKey = h.digest() - h2 = hashlib.sha1() - h2.update('dried') - hashKey2 = h2.digest() - h3 = hashlib.sha1() - h3.update('Boozoo Bajou - 09 - S.I.P.mp3') - hashKey3 = h3.digest() - #self.cases = (('a', 'hello there\nthis is a test'), - # (hashKey3, '1 2 3 4 5 6 7 8 9 0')) - self.cases = ((hashKey, 'test1test1test1test1test1t'), - (hashKey, 'test2'), - (hashKey, 'test3test3test3test3test3test3test3test3'), - (hashKey2, 'test4'), - (hashKey3, 'test5'), - (hashKey3, 'test6')) - - def testReadWrite(self): - # Test write ability - for key, value in self.cases: - try: - now = int(time.time()) - self.ds.addPeerToBlob(key, value, now, now, 'node1') - except Exception: - import traceback - self.fail('Failed writing the following data: key: "%s" ' - 'data: "%s"\n The error was: %s:' % - (key, value, traceback.format_exc(5))) - - # Verify writing (test query ability) - for key, value in self.cases: - try: - self.failUnless(self.ds.hasPeersForBlob(key), - 'Key "%s" not found in DataStore! DataStore key dump: %s' % - (key, self.ds.keys())) - except Exception: - import traceback - self.fail( - 'Failed verifying that the following key exists: "%s"\n The error was: %s:' % - (key, traceback.format_exc(5))) - - # Read back the data - for key, value in self.cases: - self.failUnless(value in self.ds.getPeersForBlob(key), - 'DataStore returned invalid data! Expected "%s", got "%s"' % - (value, self.ds.getPeersForBlob(key))) - - def testNonExistentKeys(self): - for key, value in self.cases: - self.failIf(key in self.ds.keys(), 'DataStore reports it has non-existent key: "%s"' % - key) - - def testExpires(self): - now = int(time.time()) - - h1 = hashlib.sha1() - h1.update('test1') - key1 = h1.digest() - h2 = hashlib.sha1() - h2.update('test2') - key2 = h2.digest() - td = constants.dataExpireTimeout - 100 - td2 = td + td - self.ds.addPeerToBlob(h1, 'val1', now - td, now - td, '1') - self.ds.addPeerToBlob(h1, 'val2', now - td2, now - td2, '2') - self.ds.addPeerToBlob(h2, 'val3', now - td2, now - td2, '3') - self.ds.addPeerToBlob(h2, 'val4', now, now, '4') - self.ds.removeExpiredPeers() - self.failUnless( - 'val1' in self.ds.getPeersForBlob(h1), - 'DataStore deleted an unexpired value! Value %s, publish time %s, current time %s' % - ('val1', str(now - td), str(now))) - self.failIf( - 'val2' in self.ds.getPeersForBlob(h1), - 'DataStore failed to delete an expired value! ' - 'Value %s, publish time %s, current time %s' % - ('val2', str(now - td2), str(now))) - self.failIf( - 'val3' in self.ds.getPeersForBlob(h2), - 'DataStore failed to delete an expired value! ' - 'Value %s, publish time %s, current time %s' % - ('val3', str(now - td2), str(now))) - self.failUnless( - 'val4' in self.ds.getPeersForBlob(h2), - 'DataStore deleted an unexpired value! Value %s, publish time %s, current time %s' % - ('val4', str(now), str(now))) - -# # First write with fake values -# for key, value in self.cases: -# except Exception: -# -# # write this stuff a second time, with the real values -# for key, value in self.cases: -# except Exception: -# -# # Read back the data -# for key, value in self.cases: - -# # First some values -# for key, value in self.cases: -# except Exception: -# -# -# # Delete an item from the data - -# # First some values with metadata -# for key, value in self.cases: -# except Exception: -# -# # Read back the meta-data -# for key, value in self.cases: diff --git a/lbrynet/tests/unit/dht/test_node.py b/lbrynet/tests/unit/dht/test_node.py index 3310523e95..e04b07f9bb 100644 --- a/lbrynet/tests/unit/dht/test_node.py +++ b/lbrynet/tests/unit/dht/test_node.py @@ -85,7 +85,7 @@ def testAddContact(self): h = hashlib.sha384() h.update('node1') contactID = h.digest() - contact = self.node.contact_manager.make_contact(contactID, '127.0.0.1', 91824, self.node._protocol) + contact = self.node.contact_manager.make_contact(contactID, '127.0.0.1', 9182, self.node._protocol) # Now add it... yield self.node.addContact(contact) # ...and request the closest nodes to it using FIND_NODE @@ -99,7 +99,7 @@ def testAddContact(self): def testAddSelfAsContact(self): """ Tests the node's behaviour when attempting to add itself as a contact """ # Create a contact with the same ID as the local node's ID - contact = self.node.contact_manager.make_contact(self.node.node_id, '127.0.0.1', 91824, None) + contact = self.node.contact_manager.make_contact(self.node.node_id, '127.0.0.1', 9182, None) # Now try to add it yield self.node.addContact(contact) # ...and request the closest nodes to it using FIND_NODE diff --git a/lbrynet/tests/unit/dht/test_protocol.py b/lbrynet/tests/unit/dht/test_protocol.py deleted file mode 100644 index 02b6b5adb4..0000000000 --- a/lbrynet/tests/unit/dht/test_protocol.py +++ /dev/null @@ -1,167 +0,0 @@ -# import time -# import unittest -# import twisted.internet.selectreactor -# -# import lbrynet.dht.protocol -# import lbrynet.dht.contact -# import lbrynet.dht.constants -# import lbrynet.dht.msgtypes -# from lbrynet.dht.error import TimeoutError -# from lbrynet.dht.node import Node, rpcmethod -# -# -# class KademliaProtocolTest(unittest.TestCase): -# """ Test case for the Protocol class """ -# -# def setUp(self): -# del lbrynet.dht.protocol.reactor -# lbrynet.dht.protocol.reactor = twisted.internet.selectreactor.SelectReactor() -# self.node = Node(node_id='1' * 48, udpPort=9182, externalIP="127.0.0.1") -# self.protocol = lbrynet.dht.protocol.KademliaProtocol(self.node) -# -# def testReactor(self): -# """ Tests if the reactor can start/stop the protocol correctly """ -# lbrynet.dht.protocol.reactor.listenUDP(0, self.protocol) -# lbrynet.dht.protocol.reactor.callLater(0, lbrynet.dht.protocol.reactor.stop) -# lbrynet.dht.protocol.reactor.run() -# -# def testRPCTimeout(self): -# """ Tests if a RPC message sent to a dead remote node times out correctly """ -# -# @rpcmethod -# def fake_ping(*args, **kwargs): -# time.sleep(lbrynet.dht.constants.rpcTimeout + 1) -# return 'pong' -# -# real_ping = self.node.ping -# real_timeout = lbrynet.dht.constants.rpcTimeout -# real_attempts = lbrynet.dht.constants.rpcAttempts -# lbrynet.dht.constants.rpcAttempts = 1 -# lbrynet.dht.constants.rpcTimeout = 1 -# self.node.ping = fake_ping -# deadContact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.1', 9182, self.protocol) -# self.node.addContact(deadContact) -# # Make sure the contact was added -# self.failIf(deadContact not in self.node.contacts, -# 'Contact not added to fake node (error in test code)') -# lbrynet.dht.protocol.reactor.listenUDP(9182, self.protocol) -# -# # Run the PING RPC (which should raise a timeout error) -# df = self.protocol.sendRPC(deadContact, 'ping', {}) -# -# def check_timeout(err): -# self.assertEqual(type(err), TimeoutError) -# -# df.addErrback(check_timeout) -# -# def reset_values(): -# self.node.ping = real_ping -# lbrynet.dht.constants.rpcTimeout = real_timeout -# lbrynet.dht.constants.rpcAttempts = real_attempts -# -# # See if the contact was removed due to the timeout -# def check_removed_contact(): -# self.failIf(deadContact in self.node.contacts, -# 'Contact was not removed after RPC timeout; check exception types.') -# -# df.addCallback(lambda _: reset_values()) -# -# # Stop the reactor if a result arrives (timeout or not) -# df.addBoth(lambda _: lbrynet.dht.protocol.reactor.stop()) -# df.addCallback(lambda _: check_removed_contact()) -# lbrynet.dht.protocol.reactor.run() -# -# def testRPCRequest(self): -# """ Tests if a valid RPC request is executed and responded to correctly """ -# remoteContact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.1', 9182, self.protocol) -# self.node.addContact(remoteContact) -# self.error = None -# -# def handleError(f): -# self.error = 'An RPC error occurred: %s' % f.getErrorMessage() -# -# def handleResult(result): -# expectedResult = 'pong' -# if result != expectedResult: -# self.error = 'Result from RPC is incorrect; expected "%s", got "%s"' \ -# % (expectedResult, result) -# -# # Publish the "local" node on the network -# lbrynet.dht.protocol.reactor.listenUDP(9182, self.protocol) -# # Simulate the RPC -# df = remoteContact.ping() -# df.addCallback(handleResult) -# df.addErrback(handleError) -# df.addBoth(lambda _: lbrynet.dht.protocol.reactor.stop()) -# lbrynet.dht.protocol.reactor.run() -# self.failIf(self.error, self.error) -# # The list of sent RPC messages should be empty at this stage -# self.failUnlessEqual(len(self.protocol._sentMessages), 0, -# 'The protocol is still waiting for a RPC result, ' -# 'but the transaction is already done!') -# -# def testRPCAccess(self): -# """ Tests invalid RPC requests -# Verifies that a RPC request for an existing but unpublished -# method is denied, and that the associated (remote) exception gets -# raised locally """ -# remoteContact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.1', 9182, self.protocol) -# self.node.addContact(remoteContact) -# self.error = None -# -# def handleError(f): -# try: -# f.raiseException() -# except AttributeError, e: -# # This is the expected outcome since the remote node did not publish the method -# self.error = None -# except Exception, e: -# self.error = 'The remote method failed, but the wrong exception was raised; ' \ -# 'expected AttributeError, got %s' % type(e) -# -# def handleResult(result): -# self.error = 'The remote method executed successfully, returning: "%s"; ' \ -# 'this RPC should not have been allowed.' % result -# -# # Publish the "local" node on the network -# lbrynet.dht.protocol.reactor.listenUDP(9182, self.protocol) -# # Simulate the RPC -# df = remoteContact.not_a_rpc_function() -# df.addCallback(handleResult) -# df.addErrback(handleError) -# df.addBoth(lambda _: lbrynet.dht.protocol.reactor.stop()) -# lbrynet.dht.protocol.reactor.run() -# self.failIf(self.error, self.error) -# # The list of sent RPC messages should be empty at this stage -# self.failUnlessEqual(len(self.protocol._sentMessages), 0, -# 'The protocol is still waiting for a RPC result, ' -# 'but the transaction is already done!') -# -# def testRPCRequestArgs(self): -# """ Tests if an RPC requiring arguments is executed correctly """ -# remoteContact = lbrynet.dht.contact.Contact('2' * 48, '127.0.0.1', 9182, self.protocol) -# self.node.addContact(remoteContact) -# self.error = None -# -# def handleError(f): -# self.error = 'An RPC error occurred: %s' % f.getErrorMessage() -# -# def handleResult(result): -# expectedResult = 'pong' -# if result != expectedResult: -# self.error = 'Result from RPC is incorrect; expected "%s", got "%s"' % \ -# (expectedResult, result) -# -# # Publish the "local" node on the network -# lbrynet.dht.protocol.reactor.listenUDP(9182, self.protocol) -# # Simulate the RPC -# df = remoteContact.ping() -# df.addCallback(handleResult) -# df.addErrback(handleError) -# df.addBoth(lambda _: lbrynet.dht.protocol.reactor.stop()) -# lbrynet.dht.protocol.reactor.run() -# self.failIf(self.error, self.error) -# # The list of sent RPC messages should be empty at this stage -# self.failUnlessEqual(len(self.protocol._sentMessages), 0, -# 'The protocol is still waiting for a RPC result, ' -# 'but the transaction is already done!') diff --git a/lbrynet/tests/unit/dht/test_routingtable.py b/lbrynet/tests/unit/dht/test_routingtable.py index 1c6e480981..ebe1698f98 100644 --- a/lbrynet/tests/unit/dht/test_routingtable.py +++ b/lbrynet/tests/unit/dht/test_routingtable.py @@ -27,22 +27,13 @@ def testDistance(self): """ Test to see if distance method returns correct result""" # testList holds a couple 3-tuple (variable1, variable2, result) - basicTestList = [('123456789', '123456789', 0L), ('12345', '98765', 34527773184L)] + basicTestList = [(chr(170) * 48, chr(85) * 48, long((chr(255) * 48).encode('hex'), 16))] for test in basicTestList: result = Distance(test[0])(test[1]) self.failIf(result != test[2], 'Result of _distance() should be %s but %s returned' % (test[2], result)) - baseIp = '146.64.19.111' - ipTestList = ['146.64.29.222', '192.68.19.333'] - - distanceOne = Distance(baseIp)(ipTestList[0]) - distanceTwo = Distance(baseIp)(ipTestList[1]) - - self.failIf(distanceOne > distanceTwo, '%s should be closer to the base ip %s than %s' % - (ipTestList[0], baseIp, ipTestList[1])) - @defer.inlineCallbacks def testAddContact(self): """ Tests if a contact can be added and retrieved correctly """ @@ -50,7 +41,7 @@ def testAddContact(self): h = hashlib.sha384() h.update('node2') contactID = h.digest() - contact = self.contact_manager.make_contact(contactID, '127.0.0.1', 91824, self.protocol) + contact = self.contact_manager.make_contact(contactID, '127.0.0.1', 9182, self.protocol) # Now add it... yield self.routingTable.addContact(contact) # ...and request the closest nodes to it (will retrieve it) @@ -66,7 +57,7 @@ def testGetContact(self): h = hashlib.sha384() h.update('node2') contactID = h.digest() - contact = self.contact_manager.make_contact(contactID, '127.0.0.1', 91824, self.protocol) + contact = self.contact_manager.make_contact(contactID, '127.0.0.1', 9182, self.protocol) # Now add it... yield self.routingTable.addContact(contact) # ...and get it again @@ -80,7 +71,7 @@ def testAddParentNodeAsContact(self): """ # Create a contact with the same ID as the local node's ID - contact = self.contact_manager.make_contact(self.nodeID, '127.0.0.1', 91824, self.protocol) + contact = self.contact_manager.make_contact(self.nodeID, '127.0.0.1', 9182, self.protocol) # Now try to add it yield self.routingTable.addContact(contact) # ...and request the closest nodes to it using FIND_NODE @@ -94,7 +85,7 @@ def testRemoveContact(self): h = hashlib.sha384() h.update('node2') contactID = h.digest() - contact = self.contact_manager.make_contact(contactID, '127.0.0.1', 91824, self.protocol) + contact = self.contact_manager.make_contact(contactID, '127.0.0.1', 9182, self.protocol) # Now add it... yield self.routingTable.addContact(contact) # Verify addition @@ -113,7 +104,7 @@ def testSplitBucket(self): h = hashlib.sha384() h.update('remote node %d' % i) nodeID = h.digest() - contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 91824, self.protocol) + contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 9182, self.protocol) yield self.routingTable.addContact(contact) self.failUnlessEqual(len(self.routingTable._buckets), 1, 'Only k nodes have been added; the first k-bucket should now ' @@ -122,7 +113,7 @@ def testSplitBucket(self): h = hashlib.sha384() h.update('yet another remote node') nodeID = h.digest() - contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 91824, self.protocol) + contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 9182, self.protocol) yield self.routingTable.addContact(contact) self.failUnlessEqual(len(self.routingTable._buckets), 2, 'k+1 nodes have been added; the first k-bucket should have been ' @@ -140,55 +131,43 @@ def testSplitBucket(self): @defer.inlineCallbacks def testFullSplit(self): """ - Test that a bucket is not split if it full, but does not cover the range - containing the parent node's ID + Test that a bucket is not split if it is full, but the new contact is not closer than the kth closest contact """ - self.routingTable._parentNodeID = 49 * 'a' - # more than 384 bits; this will not be in the range of _any_ k-bucket + self.routingTable._parentNodeID = 48 * chr(255) node_ids = [ - "d4a27096d81e3c4efacce9f940e887c956f736f859c8037b556efec6fdda5c388ae92bae96b9eb204b24da2f376c4282", - "553c0bfe119c35247c8cb8124091acb5c05394d5be7b019f6b1a5e18036af7a6148711ad6d47a0f955047bf9eac868aa", - "671a179c251c90863f46e7ef54264cbbad743fe3127871064d8f051ce4124fcbd893339e11358f621655e37bd6a74097", - "f896bafeb7ffb14b92986e3b08ee06807fdd5be34ab43f4f52559a5bbf0f12dedcd8556801f97c334b3ac9be7a0f7a93", - "33a7deb380eb4707211184798b66840c22c396e8cde00b75b64f9ead09bad1141b56d35a93bd511adb28c6708eecc39d", - "5e1e8ca575b536ae5ec52f7766ada904a64ebaad805909b1067ec3c984bf99909c9fcdd37e04ea5c5c043ea8830100ce", - "ee18857d0c1f7fc413424f3ffead4871f2499646d4c2ac16f35f0c8864318ca21596915f18f85a3a25f8ceaa56c844aa", - "68039f78fbf130873e7cce2f71f39d217dcb7f3fe562d64a85de4e21ee980b4a800f51bf6851d2bbf10e6590fe0d46b2" + "100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + "200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + "300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + "400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + "500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + "600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + "700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + "800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + "ff0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + "010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" ] # Add k contacts - for i in range(constants.k): - h = hashlib.sha384() - h.update('remote node %d' % i) - nodeID = h.digest() - self.assertEquals(nodeID, node_ids[i].decode('hex')) - contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 91824, self.protocol) + for nodeID in node_ids: + # self.assertEquals(nodeID, node_ids[i].decode('hex')) + contact = self.contact_manager.make_contact(nodeID.decode('hex'), '127.0.0.1', 9182, self.protocol) yield self.routingTable.addContact(contact) - self.failUnlessEqual(len(self.routingTable._buckets), 1) - self.failUnlessEqual(len(self.routingTable._buckets[0]._contacts), constants.k) + self.failUnlessEqual(len(self.routingTable._buckets), 2) + self.failUnlessEqual(len(self.routingTable._buckets[0]._contacts), 8) + self.failUnlessEqual(len(self.routingTable._buckets[1]._contacts), 2) # try adding a contact who is further from us than the k'th known contact - h = hashlib.sha384() - h.update('yet another remote node!') - nodeID = h.digest() - contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 91824, self.protocol) - yield self.routingTable.addContact(contact) - self.failUnlessEqual(len(self.routingTable._buckets), 1) - self.failUnlessEqual(len(self.routingTable._buckets[0]._contacts), constants.k) - self.failIf(contact in self.routingTable._buckets[0]._contacts) - - # try adding a contact who is closer to us than the k'th known contact - h = hashlib.sha384() - h.update('yet another remote node') - nodeID = h.digest() - contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 91824, self.protocol) + nodeID = '020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'.decode('hex') + contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 9182, self.protocol) + self.assertFalse(self.routingTable._shouldSplit(self.routingTable._kbucketIndex(contact.id), contact.id)) yield self.routingTable.addContact(contact) self.failUnlessEqual(len(self.routingTable._buckets), 2) - self.failUnlessEqual(len(self.routingTable._buckets[0]._contacts), 5) - self.failUnlessEqual(len(self.routingTable._buckets[1]._contacts), 4) - self.failIf(contact not in self.routingTable._buckets[1]._contacts) + self.failUnlessEqual(len(self.routingTable._buckets[0]._contacts), 8) + self.failUnlessEqual(len(self.routingTable._buckets[1]._contacts), 2) + self.failIf(contact in self.routingTable._buckets[0]._contacts) + self.failIf(contact in self.routingTable._buckets[1]._contacts) # class KeyErrorFixedTest(unittest.TestCase): From 30c4b160f18e8c05a43ffbb7ac91259d812125eb Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 16:23:23 -0400 Subject: [PATCH 56/79] use epoll reactor for seed node script --- scripts/seed_node.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/seed_node.py b/scripts/seed_node.py index 18e349dbe9..3f32681307 100644 --- a/scripts/seed_node.py +++ b/scripts/seed_node.py @@ -5,6 +5,8 @@ import hashlib from copy import deepcopy from urllib import urlopen +from twisted.internet.epollreactor import install as install_epoll +install_epoll() from twisted.internet import reactor, defer from twisted.web import resource from twisted.web.server import Site @@ -54,7 +56,7 @@ def format_contact(contact): class MultiSeedRPCServer(AuthJSONRPCServer): - def __init__(self, starting_node_port=4455, nodes=50, rpc_port=5280): + def __init__(self, starting_node_port, nodes, rpc_port): AuthJSONRPCServer.__init__(self, False) self.port = None self.rpc_port = rpc_port @@ -208,7 +210,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('--rpc_port', default=5280) parser.add_argument('--starting_port', default=4455) - parser.add_argument('--nodes', default=50) + parser.add_argument('--nodes', default=32) args = parser.parse_args() MultiSeedRPCServer(int(args.starting_port), int(args.nodes), int(args.rpc_port)) reactor.run() From 0e80123615cd4bf54aac5e7f7b3e82a6b7d8a6d2 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 16:25:10 -0400 Subject: [PATCH 57/79] use 12 minutes instead of 15 as delay in contact_is_good --- lbrynet/dht/contact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lbrynet/dht/contact.py b/lbrynet/dht/contact.py index d7bbb443b9..736dfa4768 100644 --- a/lbrynet/dht/contact.py +++ b/lbrynet/dht/contact.py @@ -68,7 +68,7 @@ def contact_is_good(self): """ failures = self.failures now = self.getTime() - delay = constants.refreshTimeout / 4 + delay = constants.checkRefreshInterval if failures: if self.lastReplied and len(failures) >= 2 and self.lastReplied < failures[-2]: From cce3c8c7b5197329b0ae62a5adfdeb1325de82b7 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 16:25:47 -0400 Subject: [PATCH 58/79] increase kademlia rpc timeout to 8 seconds --- lbrynet/dht/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lbrynet/dht/constants.py b/lbrynet/dht/constants.py index 7ea3f7258d..2697e0d64d 100644 --- a/lbrynet/dht/constants.py +++ b/lbrynet/dht/constants.py @@ -25,7 +25,7 @@ replacementCacheSize = 8 #: Timeout for network operations (in seconds) -rpcTimeout = 5 +rpcTimeout = 8 # number of rpc attempts to make before a timeout results in the node being removed as a contact rpcAttempts = 5 From 7d21cc582282d4796e2717b46bf38b226b68aa64 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 29 May 2018 16:50:23 -0400 Subject: [PATCH 59/79] pylint and more tests --- lbrynet/dht/datastore.py | 4 ++- lbrynet/dht/distance.py | 2 +- .../tests/functional/dht/test_contact_rpc.py | 32 +------------------ lbrynet/tests/unit/dht/test_routingtable.py | 3 +- 4 files changed, 7 insertions(+), 34 deletions(-) diff --git a/lbrynet/dht/datastore.py b/lbrynet/dht/datastore.py index 012122335b..72969a7722 100644 --- a/lbrynet/dht/datastore.py +++ b/lbrynet/dht/datastore.py @@ -51,7 +51,9 @@ def hasPeersForBlob(self, key): def addPeerToBlob(self, contact, key, compact_address, lastPublished, originallyPublished, originalPublisherID): if key in self._dict: if compact_address not in map(lambda store_tuple: store_tuple[1], self._dict[key]): - self._dict[key].append((contact, compact_address, lastPublished, originallyPublished, originalPublisherID)) + self._dict[key].append( + (contact, compact_address, lastPublished, originallyPublished, originalPublisherID) + ) else: self._dict[key] = [(contact, compact_address, lastPublished, originallyPublished, originalPublisherID)] diff --git a/lbrynet/dht/distance.py b/lbrynet/dht/distance.py index 8945f8b30f..2c93ae9c2b 100644 --- a/lbrynet/dht/distance.py +++ b/lbrynet/dht/distance.py @@ -10,7 +10,7 @@ class Distance(object): def __init__(self, key): if len(key) != constants.key_bits / 8: - raise ValueError("invalid key length: %i", len(key)) + raise ValueError("invalid key length: %i" % len(key)) self.key = key self.val_key_one = long(key.encode('hex'), 16) diff --git a/lbrynet/tests/functional/dht/test_contact_rpc.py b/lbrynet/tests/functional/dht/test_contact_rpc.py index a7dd431997..7f3141ecf5 100644 --- a/lbrynet/tests/functional/dht/test_contact_rpc.py +++ b/lbrynet/tests/functional/dht/test_contact_rpc.py @@ -131,38 +131,8 @@ def testRPCAccess(self): raised locally """ remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) - remote_node.start_listening() remote_contact = remote_node.contact_manager.make_contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) - self.node.addContact(remote_contact) - - self.error = None - - def handleError(f): - try: - f.raiseException() - except AttributeError, e: - # This is the expected outcome since the remote node did not publish the method - self.error = None - except Exception, e: - self.error = 'The remote method failed, but the wrong exception was raised; ' \ - 'expected AttributeError, got %s' % type(e) - - def handleResult(result): - self.error = 'The remote method executed successfully, returning: "%s"; ' \ - 'this RPC should not have been allowed.' % result - - self.node.start_listening() - self._reactor.pump([1 for _ in range(10)]) - # Simulate the RPC - df = remote_contact.not_a_rpc_function() - df.addCallback(handleResult) - df.addErrback(handleError) - self._reactor.pump([1 for _ in range(10)]) - self.failIf(self.error, self.error) - # The list of sent RPC messages should be empty at this stage - self.failUnlessEqual(len(self.node._protocol._sentMessages), 0, - 'The protocol is still waiting for a RPC result, ' - 'but the transaction is already done!') + self.assertRaises(AttributeError, getattr, remote_contact, "not_a_rpc_function") def testRPCRequestArgs(self): """ Tests if an RPC requiring arguments is executed correctly """ diff --git a/lbrynet/tests/unit/dht/test_routingtable.py b/lbrynet/tests/unit/dht/test_routingtable.py index ebe1698f98..c9a6a0a5c0 100644 --- a/lbrynet/tests/unit/dht/test_routingtable.py +++ b/lbrynet/tests/unit/dht/test_routingtable.py @@ -159,7 +159,8 @@ def testFullSplit(self): self.failUnlessEqual(len(self.routingTable._buckets[1]._contacts), 2) # try adding a contact who is further from us than the k'th known contact - nodeID = '020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'.decode('hex') + nodeID = '020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000' + nodeID = nodeID.decode('hex') contact = self.contact_manager.make_contact(nodeID, '127.0.0.1', 9182, self.protocol) self.assertFalse(self.routingTable._shouldSplit(self.routingTable._kbucketIndex(contact.id), contact.id)) yield self.routingTable.addContact(contact) From 9a63db4ec66783a2326ec02cc055f912a5f0d6d2 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 31 May 2018 10:50:11 -0400 Subject: [PATCH 60/79] add protocol version to the dht and migrate old arg format for store --- lbrynet/dht/constants.py | 2 + lbrynet/dht/contact.py | 6 +- lbrynet/dht/iterativefind.py | 24 ++- lbrynet/dht/node.py | 17 +- lbrynet/dht/protocol.py | 82 +++++--- .../tests/functional/dht/test_contact_rpc.py | 177 ++++++++++++++---- lbrynet/tests/unit/dht/test_node.py | 2 +- 7 files changed, 227 insertions(+), 83 deletions(-) diff --git a/lbrynet/dht/constants.py b/lbrynet/dht/constants.py index 2697e0d64d..e06aae1cdc 100644 --- a/lbrynet/dht/constants.py +++ b/lbrynet/dht/constants.py @@ -55,3 +55,5 @@ key_bits = 384 rpc_id_length = 20 + +protocolVersion = 1 diff --git a/lbrynet/dht/contact.py b/lbrynet/dht/contact.py index 736dfa4768..0121afdc34 100644 --- a/lbrynet/dht/contact.py +++ b/lbrynet/dht/contact.py @@ -34,6 +34,7 @@ def __init__(self, contactManager, id, ipAddress, udpPort, networkProtocol, firs self.getTime = self._contactManager._get_time self.lastReplied = None self.lastRequested = None + self.protocolVersion = constants.protocolVersion @property def lastInteracted(self): @@ -120,6 +121,9 @@ def update_last_failed(self): failures.append(self.getTime()) self._contactManager._rpc_failures[(self.address, self.port)] = failures + def update_protocol_version(self, version): + self.protocolVersion = version + def __str__(self): return '<%s.%s object; IP address: %s, UDP port: %d>' % ( self.__module__, self.__class__.__name__, self.address, self.port) @@ -143,7 +147,7 @@ def __getattr__(self, name): raise AttributeError("unknown command: %s" % name) def _sendRPC(*args, **kwargs): - return self._networkProtocol.sendRPC(self, name, args, **kwargs) + return self._networkProtocol.sendRPC(self, name, args) return _sendRPC diff --git a/lbrynet/dht/iterativefind.py b/lbrynet/dht/iterativefind.py index 40e77a0229..608fd54185 100644 --- a/lbrynet/dht/iterativefind.py +++ b/lbrynet/dht/iterativefind.py @@ -53,10 +53,10 @@ def is_find_node_request(self): def is_find_value_request(self): return self.rpc == "findValue" - def is_closer(self, responseMsg): + def is_closer(self, contact): if not self.closest_node: return True - return self.distance.is_closer(responseMsg.nodeID, self.closest_node.id) + return self.distance.is_closer(contact.id, self.closest_node.id) def getContactTriples(self, result): if self.is_find_value_request: @@ -73,16 +73,15 @@ def sortByDistance(self, contact_list): contact_list.sort(key=lambda c: self.distance(c.id)) @defer.inlineCallbacks - def extendShortlist(self, contact, responseTuple): + def extendShortlist(self, contact, result): # The "raw response" tuple contains the response message and the originating address info - responseMsg = responseTuple[0] - originAddress = responseTuple[1] # tuple: (ip address, udp port) + originAddress = (contact.address, contact.port) if self.finished_deferred.called: - defer.returnValue(responseMsg.nodeID) + defer.returnValue(contact.id) if self.node.contact_manager.is_ignored(originAddress): raise ValueError("contact is ignored") - if responseMsg.nodeID == self.node.node_id: - defer.returnValue(responseMsg.nodeID) + if contact.id == self.node.node_id: + defer.returnValue(contact.id) yield self._lock.acquire() @@ -92,7 +91,6 @@ def extendShortlist(self, contact, responseTuple): self.shortlist.append(contact) # Now grow extend the (unverified) shortlist with the returned contacts - result = responseMsg.response # TODO: some validation on the result (for guarding against attacks) # If we are looking for a value, first see if this result is the value # we are looking for before treating it as a list of contact triples @@ -107,7 +105,7 @@ def extendShortlist(self, contact, responseTuple): # - mark it as the closest "empty" node, if it is # TODO: store to this peer after finding the value as per the kademlia spec if 'closestNodeNoValue' in self.find_value_result: - if self.is_closer(responseMsg): + if self.is_closer(contact): self.find_value_result['closestNodeNoValue'] = contact else: self.find_value_result['closestNodeNoValue'] = contact @@ -130,14 +128,14 @@ def extendShortlist(self, contact, responseTuple): self.sortByDistance(self.active_contacts) self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) - defer.returnValue(responseMsg.nodeID) + defer.returnValue(contact.id) @defer.inlineCallbacks def probeContact(self, contact): fn = getattr(contact, self.rpc) try: - response_tuple = yield fn(self.key, rawResponse=True) - result = yield self.extendShortlist(contact, response_tuple) + response = yield fn(self.key) + result = yield self.extendShortlist(contact, response) defer.returnValue(result) except (TimeoutError, defer.CancelledError, ValueError, IndexError): defer.returnValue(contact.id) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index e4add4bdc6..200d31ade4 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -315,7 +315,7 @@ def announceHaveBlob(self, blob_hash): self_contact = self.contact_manager.make_contact(self.node_id, self.externalIP, self.port, self._protocol) token = self.make_token(self_contact.compact_ip()) - yield self.store(self_contact, blob_hash, token, self.peerPort) + yield self.store(self_contact, blob_hash, token, self.peerPort, self.node_id, 0) elif self.externalIP is not None: pass else: @@ -327,15 +327,15 @@ def announceHaveBlob(self, blob_hash): def announce_to_contact(contact): known_nodes[contact.id] = contact try: - responseMsg, originAddress = yield contact.findValue(blob_hash, rawResponse=True) - res = yield contact.store(blob_hash, responseMsg.response['token'], self.peerPort) + response = yield contact.findValue(blob_hash) + res = yield contact.store(blob_hash, response['token'], self.peerPort, self.node_id, 0) if res != "OK": raise ValueError(res) contacted.append(contact) - log.debug("Stored %s to %s (%s)", blob_hash.encode('hex'), contact.id.encode('hex'), originAddress[0]) + log.debug("Stored %s to %s (%s)", binascii.hexlify(blob_hash), contact.log_id(), contact.address) except protocol.TimeoutError: log.debug("Timeout while storing blob_hash %s at %s", - blob_hash.encode('hex')[:16], contact.log_id()) + binascii.hexlify(blob_hash), contact.log_id()) except ValueError as err: log.error("Unexpected response: %s" % err.message) except Exception as err: @@ -348,7 +348,7 @@ def announce_to_contact(contact): yield defer.DeferredList(dl) - log.debug("Stored %s to %i of %i attempted peers", blob_hash.encode('hex')[:16], + log.debug("Stored %s to %i of %i attempted peers", binascii.hexlify(blob_hash), len(contacted), len(contacts)) contacted_node_ids = [c.id.encode('hex') for c in contacted] @@ -506,7 +506,7 @@ def ping(self): return 'pong' @rpcmethod - def store(self, rpc_contact, blob_hash, token, port, originalPublisherID=None, age=0): + def store(self, rpc_contact, blob_hash, token, port, originalPublisherID, age): """ Store the received data in this node's local datastore @param blob_hash: The hash of the data @@ -589,6 +589,9 @@ def findValue(self, rpc_contact, key): 'token': self.make_token(rpc_contact.compact_ip()), } + if self._protocol._protocolVersion: + response['protocolVersion'] = self._protocol._protocolVersion + if self._dataStore.hasPeersForBlob(key): response[key] = self._dataStore.getPeersForBlob(key) else: diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index f7a39a8320..d8ccf69af9 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -102,8 +102,37 @@ def __init__(self, node): self._partialMessagesProgress = {} self._listening = defer.Deferred(None) self._ping_queue = PingQueue(self._node) - - def sendRPC(self, contact, method, args, rawResponse=False): + self._protocolVersion = constants.protocolVersion + + def _migrate_incoming_rpc_args(self, contact, method, *args): + if method == 'store' and contact.protocolVersion == 0: + if isinstance(args[1], dict): + blob_hash = args[0] + token = args[1].pop('token', None) + port = args[1].pop('port', -1) + originalPublisherID = args[1].pop('lbryid', None) + age = 0 + return (blob_hash, token, port, originalPublisherID, age), {} + return args, {} + + def _migrate_outgoing_rpc_args(self, contact, method, *args): + """ + This will reformat protocol version 0 arguments for the store function and will add the + protocol version keyword argument to calls to contacts who will accept it + """ + if contact.protocolVersion == 0: + if method == 'store': + blob_hash, token, port, originalPublisherID, age = args + args = (blob_hash, {'token': token, 'port': port, 'lbryid': originalPublisherID}, originalPublisherID, + False) + return args + return args + if args and isinstance(args[-1], dict): + args[-1]['protocolVersion'] = self._protocolVersion + return args + return args + ({'protocolVersion': self._protocolVersion},) + + def sendRPC(self, contact, method, args): """ Sends an RPC to the specified contact @@ -114,14 +143,6 @@ def sendRPC(self, contact, method, args, rawResponse=False): @param args: A list of (non-keyword) arguments to pass to the remote method, in the correct order @type args: tuple - @param rawResponse: If this is set to C{True}, the caller of this RPC - will receive a tuple containing the actual response - message object and the originating address tuple as - a result; in other words, it will not be - interpreted by this class. Unless something special - needs to be done with the metadata associated with - the message, this should remain C{False}. - @type rawResponse: bool @return: This immediately returns a deferred object, which will return the result of the RPC call, or raise the relevant exception @@ -131,7 +152,8 @@ def sendRPC(self, contact, method, args, rawResponse=False): C{ErrorMessage}). @rtype: twisted.internet.defer.Deferred """ - msg = msgtypes.RequestMessage(self._node.node_id, method, args) + msg = msgtypes.RequestMessage(self._node.node_id, method, self._migrate_outgoing_rpc_args(contact, method, + *args)) msgPrimitive = self._translator.toPrimitive(msg) encodedMsg = self._encoder.encode(msgPrimitive) @@ -143,8 +165,6 @@ def sendRPC(self, contact, method, args, rawResponse=False): contact.address, contact.port) df = defer.Deferred() - if rawResponse: - df._rpcRawResponse = True def _remove_contact(failure): # remove the contact from the routing table and track the failure try: @@ -156,6 +176,11 @@ def _remove_contact(failure): # remove the contact from the routing table and t def _update_contact(result): # refresh the contact in the routing table contact.update_last_replied() + if method == 'findValue': + if 'protocolVersion' not in result: + contact.update_protocol_version(0) + else: + contact.update_protocol_version(result.pop('protocolVersion')) d = self._node.addContact(contact) d.addCallback(lambda _: result) return d @@ -284,14 +309,8 @@ def datagramReceived(self, datagram, address): elif not remoteContact.id: remoteContact.set_id(message.nodeID) - if hasattr(df, '_rpcRawResponse'): - # The RPC requested that the raw response message - # and originating address be returned; do not - # interpret it - df.callback((message, address)) - else: - # We got a result from the RPC - df.callback(message.response) + # We got a result from the RPC + df.callback(message.response) else: # If the original message isn't found, it must have timed out # TODO: we should probably do something with this... @@ -395,20 +414,25 @@ def handleResult(result): func = getattr(self._node, method, None) if callable(func) and hasattr(func, "rpcmethod"): # Call the exposed Node method and return the result to the deferred callback chain - if args: - log.debug("%s:%i RECV CALL %s(%s) %s:%i", self._node.externalIP, self._node.port, method, - args[0].encode('hex'), senderContact.address, senderContact.port) - else: - log.debug("%s:%i RECV CALL %s %s:%i", self._node.externalIP, self._node.port, method, + # if args: + # log.debug("%s:%i RECV CALL %s(%s) %s:%i", self._node.externalIP, self._node.port, method, + # args[0].encode('hex'), senderContact.address, senderContact.port) + # else: + log.debug("%s:%i RECV CALL %s %s:%i", self._node.externalIP, self._node.port, method, senderContact.address, senderContact.port) + if args and isinstance(args[-1], dict) and 'protocolVersion' in args[-1]: # args don't need reformatting + senderContact.update_protocol_version(int(args[-1].pop('protocolVersion'))) + a, kw = tuple(args[:-1]), args[-1] + else: + senderContact.update_protocol_version(0) + a, kw = self._migrate_incoming_rpc_args(senderContact, method, *args) try: if method != 'ping': - result = func(senderContact, *args) + result = func(senderContact, *a) else: result = func() except Exception, e: - log.exception("error handling request for %s:%i %s", senderContact.address, - senderContact.port, method) + log.exception("error handling request for %s:%i %s", senderContact.address, senderContact.port, method) df.errback(e) else: df.callback(result) diff --git a/lbrynet/tests/functional/dht/test_contact_rpc.py b/lbrynet/tests/functional/dht/test_contact_rpc.py index 7f3141ecf5..f6233be258 100644 --- a/lbrynet/tests/functional/dht/test_contact_rpc.py +++ b/lbrynet/tests/functional/dht/test_contact_rpc.py @@ -1,12 +1,10 @@ import time -import unittest +from twisted.trial import unittest import logging from twisted.internet.task import Clock from twisted.internet import defer import lbrynet.dht.protocol import lbrynet.dht.contact -import lbrynet.dht.constants -import lbrynet.dht.msgtypes from lbrynet.dht.error import TimeoutError from lbrynet.dht.node import Node, rpcmethod from mock_transport import listenUDP, resolve @@ -23,8 +21,18 @@ def setUp(self): self._reactor = Clock() self.node = Node(node_id='1' * 48, udpPort=self.udpPort, externalIP="127.0.0.1", listenUDP=listenUDP, resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) + self.remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, + resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) + self.remote_contact = self.node.contact_manager.make_contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) + self.us_from_them = self.remote_node.contact_manager.make_contact('1' * 48, '127.0.0.1', 9182, + self.remote_node._protocol) + self.node.start_listening() + self.remote_node.start_listening() + @defer.inlineCallbacks def tearDown(self): + yield self.node.stop() + yield self.remote_node.stop() del self._reactor @defer.inlineCallbacks @@ -37,15 +45,12 @@ def testReactor(self): result = yield d self.assertTrue(result) + @defer.inlineCallbacks def testRPCTimeout(self): """ Tests if a RPC message sent to a dead remote node times out correctly """ - dead_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, - resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) - dead_node.start_listening() - dead_node.stop() + yield self.remote_node.stop() self._reactor.pump([1 for _ in range(10)]) - dead_contact = self.node.contact_manager.make_contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) - self.node.addContact(dead_contact) + self.node.addContact(self.remote_contact) @rpcmethod def fake_ping(*args, **kwargs): @@ -60,12 +65,12 @@ def fake_ping(*args, **kwargs): self.node.ping = fake_ping # Make sure the contact was added - self.failIf(dead_contact not in self.node.contacts, + self.failIf(self.remote_contact not in self.node.contacts, 'Contact not added to fake node (error in test code)') self.node.start_listening() # Run the PING RPC (which should raise a timeout error) - df = self.node._protocol.sendRPC(dead_contact, 'ping', {}) + df = self.remote_contact.ping() def check_timeout(err): self.assertEqual(err.type, TimeoutError) @@ -79,7 +84,7 @@ def reset_values(): # See if the contact was removed due to the timeout def check_removed_contact(): - self.failIf(dead_contact in self.node.contacts, + self.failIf(self.remote_contact in self.node.contacts, 'Contact was not removed after RPC timeout; check exception types.') df.addCallback(lambda _: reset_values()) @@ -88,14 +93,11 @@ def check_removed_contact(): df.addCallback(lambda _: check_removed_contact()) self._reactor.pump([1 for _ in range(20)]) + @defer.inlineCallbacks def testRPCRequest(self): """ Tests if a valid RPC request is executed and responded to correctly """ - remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, - resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) - remote_node.start_listening() - remoteContact = remote_node.contact_manager.make_contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) - self.node.addContact(remoteContact) + yield self.node.addContact(self.remote_contact) self.error = None @@ -108,15 +110,13 @@ def handleResult(result): self.error = 'Result from RPC is incorrect; expected "%s", got "%s"' \ % (expectedResult, result) - # Publish the "local" node on the network - self.node.start_listening() # Simulate the RPC - df = remoteContact.ping() + df = self.remote_contact.ping() df.addCallback(handleResult) df.addErrback(handleError) - for _ in range(10): - self._reactor.advance(1) + self._reactor.advance(2) + yield df self.failIf(self.error, self.error) # The list of sent RPC messages should be empty at this stage @@ -129,18 +129,13 @@ def testRPCAccess(self): Verifies that a RPC request for an existing but unpublished method is denied, and that the associated (remote) exception gets raised locally """ - remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, - resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) - remote_contact = remote_node.contact_manager.make_contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) - self.assertRaises(AttributeError, getattr, remote_contact, "not_a_rpc_function") + + self.assertRaises(AttributeError, getattr, self.remote_contact, "not_a_rpc_function") def testRPCRequestArgs(self): """ Tests if an RPC requiring arguments is executed correctly """ - remote_node = Node(node_id='2' * 48, udpPort=self.udpPort, externalIP="127.0.0.2", listenUDP=listenUDP, - resolve=resolve, clock=self._reactor, callLater=self._reactor.callLater) - remote_node.start_listening() - remote_contact = remote_node.contact_manager.make_contact('2' * 48, '127.0.0.2', 9182, self.node._protocol) - self.node.addContact(remote_contact) + + self.node.addContact(self.remote_contact) self.error = None def handleError(f): @@ -155,7 +150,7 @@ def handleResult(result): # Publish the "local" node on the network self.node.start_listening() # Simulate the RPC - df = remote_contact.ping() + df = self.remote_contact.ping() df.addCallback(handleResult) df.addErrback(handleError) self._reactor.pump([1 for _ in range(10)]) @@ -164,3 +159,121 @@ def handleResult(result): self.failUnlessEqual(len(self.node._protocol._sentMessages), 0, 'The protocol is still waiting for a RPC result, ' 'but the transaction is already done!') + + @defer.inlineCallbacks + def testDetectProtocolVersion(self): + original_findvalue = self.remote_node.findValue + fake_blob = str("AB" * 48).decode('hex') + + @rpcmethod + def findValue(contact, key): + result = original_findvalue(contact, key) + result.pop('protocolVersion') + return result + + self.assertEquals(self.remote_contact.protocolVersion, 1) + + self.remote_node.findValue = findValue + d = self.remote_contact.findValue(fake_blob) + self._reactor.advance(3) + find_value_response = yield d + self.assertEquals(self.remote_contact.protocolVersion, 0) + self.assertTrue('protocolVersion' not in find_value_response) + + self.remote_node.findValue = original_findvalue + d = self.remote_contact.findValue(fake_blob) + self._reactor.advance(3) + find_value_response = yield d + self.assertEquals(self.remote_contact.protocolVersion, 1) + self.assertTrue('protocolVersion' not in find_value_response) + + self.remote_node.findValue = findValue + d = self.remote_contact.findValue(fake_blob) + self._reactor.advance(3) + find_value_response = yield d + self.assertEquals(self.remote_contact.protocolVersion, 0) + self.assertTrue('protocolVersion' not in find_value_response) + + @defer.inlineCallbacks + def testStoreToPre_0_20_0_Node(self): + + self.remote_node._protocol._protocolVersion = 0 + + def _dont_migrate(contact, method, *args): + return args, {} + + self.remote_node._protocol._migrate_incoming_rpc_args = _dont_migrate + + original_findvalue = self.remote_node.findValue + original_store = self.remote_node.store + + @rpcmethod + def findValue(contact, key): + result = original_findvalue(contact, key) + if 'protocolVersion' in result: + result.pop('protocolVersion') + return result + + @rpcmethod + def store(contact, key, value, originalPublisherID=None, self_store=False, **kwargs): + self.assertTrue(len(key) == 48) + self.assertSetEqual(set(value.keys()), {'token', 'lbryid', 'port'}) + self.assertFalse(self_store) + self.assertDictEqual(kwargs, {}) + return original_store( # pylint: disable=too-many-function-args + contact, key, value['token'], value['port'], originalPublisherID, 0 + ) + + self.assertEquals(self.remote_contact.protocolVersion, 1) + + self.remote_node.findValue = findValue + self.remote_node.store = store + + fake_blob = str("AB" * 48).decode('hex') + + d = self.remote_contact.findValue(fake_blob) + self._reactor.advance(3) + find_value_response = yield d + self.assertEquals(self.remote_contact.protocolVersion, 0) + self.assertTrue('protocolVersion' not in find_value_response) + token = find_value_response['token'] + d = self.remote_contact.store(fake_blob, token, 3333, self.node.node_id, 0) + self._reactor.advance(3) + response = yield d + self.assertEquals(response, "OK") + self.assertEquals(self.remote_contact.protocolVersion, 0) + self.assertTrue(self.remote_node._dataStore.hasPeersForBlob(fake_blob)) + self.assertEquals(len(self.remote_node._dataStore.getStoringContacts()), 1) + + @defer.inlineCallbacks + def testStoreFromPre_0_20_0_Node(self): + + self.remote_node._protocol._protocolVersion = 0 + + def _dont_migrate(contact, method, *args): + return args + + self.remote_node._protocol._migrate_outgoing_rpc_args = _dont_migrate + + us_from_them = self.remote_node.contact_manager.make_contact('1' * 48, '127.0.0.1', self.udpPort, + self.remote_node._protocol) + + fake_blob = str("AB" * 48).decode('hex') + + d = us_from_them.findValue(fake_blob) + self._reactor.advance(3) + find_value_response = yield d + self.assertEquals(self.remote_contact.protocolVersion, 0) + self.assertTrue('protocolVersion' not in find_value_response) + token = find_value_response['token'] + us_from_them.update_protocol_version(0) + d = self.remote_node._protocol.sendRPC( + us_from_them, "store", (fake_blob, {'lbryid': self.remote_node.node_id, 'token': token, 'port': 3333}) + ) + self._reactor.advance(3) + response = yield d + self.assertEquals(response, "OK") + self.assertEquals(self.remote_contact.protocolVersion, 0) + self.assertTrue(self.node._dataStore.hasPeersForBlob(fake_blob)) + self.assertEquals(len(self.node._dataStore.getStoringContacts()), 1) + self.assertIs(self.node._dataStore.getStoringContacts()[0], self.remote_contact) diff --git a/lbrynet/tests/unit/dht/test_node.py b/lbrynet/tests/unit/dht/test_node.py index e04b07f9bb..f5fe876abd 100644 --- a/lbrynet/tests/unit/dht/test_node.py +++ b/lbrynet/tests/unit/dht/test_node.py @@ -62,7 +62,7 @@ def setUp(self): def testStore(self): """ Tests if the node can store (and privately retrieve) some data """ for key, port in self.cases: - yield self.node.store(self.contact, key, self.token, port, self.contact.id) + yield self.node.store(self.contact, key, self.token, port, self.contact.id, 0) for key, value in self.cases: expected_result = self.contact.compact_ip() + str(struct.pack('>H', value)) + \ self.contact.id From 0386bfadf07d1913187e7328ee4555933f24b91a Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 31 May 2018 12:01:07 -0400 Subject: [PATCH 61/79] update seed script --- scripts/seed_node.py | 46 ++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/scripts/seed_node.py b/scripts/seed_node.py index 3f32681307..c94d55de05 100644 --- a/scripts/seed_node.py +++ b/scripts/seed_node.py @@ -48,13 +48,30 @@ def format_contact(contact): return { "node_id": contact.id.encode('hex'), "address": contact.address, - "port": contact.port, + "nodePort": contact.port, "lastReplied": contact.lastReplied, "lastRequested": contact.lastRequested, - "failedRPCs": contact.failedRPCs + "failedRPCs": contact.failedRPCs, + "lastFailed": None if not contact.failures else contact.failures[-1] } +def format_datastore(node): + datastore = deepcopy(node._dataStore._dict) + result = {} + for key, values in datastore.iteritems(): + contacts = [] + for (contact, value, last_published, originally_published, original_publisher_id) in values: + contact_dict = format_contact(contact) + contact_dict['peerPort'] = struct.unpack('>H', value[4:6])[0] + contact_dict['lastPublished'] = last_published + contact_dict['originallyPublished'] = originally_published + contact_dict['originalPublisherID'] = original_publisher_id.encode('hex') + contacts.append(contact_dict) + result[key.encode('hex')] = contacts + return result + + class MultiSeedRPCServer(AuthJSONRPCServer): def __init__(self, starting_node_port, nodes, rpc_port): AuthJSONRPCServer.__init__(self, False) @@ -103,27 +120,18 @@ def jsonrpc_get_node_ids(self): return defer.succeed([node.node_id.encode('hex') for node in self._nodes]) def jsonrpc_node_datastore(self, node_id): - def format_datastore(node): - datastore = deepcopy(node._dataStore._dict) - result = {} - for key, values in datastore.iteritems(): - contacts = [] - for (value, last_published, originally_published, original_publisher_id) in values: - host = ".".join([str(ord(d)) for d in value[:4]]) - port, = struct.unpack('>H', value[4:6]) - peer_node_id = value[6:] - contact_dict = format_contact(node.contact_manager.make_contact(peer_node_id, host, port)) - contact_dict['lastPublished'] = last_published - contact_dict['originallyPublished'] = originally_published - contact_dict['originalPublisherID'] = original_publisher_id - contacts.append(contact_dict) - result[key.encode('hex')] = contacts - return result - for node in self._nodes: if node.node_id == node_id.decode('hex'): return defer.succeed(format_datastore(node)) + def jsonrpc_get_nodes_who_stored(self, blob_hash): + storing_nodes = {} + for node in self._nodes: + datastore = format_datastore(node) + if blob_hash in datastore: + storing_nodes[node.node_id.encode('hex')] = datastore[blob_hash] + return defer.succeed(storing_nodes) + def jsonrpc_node_routing_table(self, node_id): def format_bucket(bucket): return { From 42eb172638a04f0dcd8b267e60bd5fd65436dea3 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 5 Jun 2018 15:08:49 -0400 Subject: [PATCH 62/79] refactor announceHaveBlob -add cached `token` to Contact objects to minimize findValue requests -remove self_store, always store to remote contacts even if we're the closest known node to the hash -move the store call and error handling from announceHaveBlob to a smaller function of its own --- lbrynet/dht/contact.py | 9 +++++ lbrynet/dht/node.py | 74 +++++++++++++++++------------------------- 2 files changed, 39 insertions(+), 44 deletions(-) diff --git a/lbrynet/dht/contact.py b/lbrynet/dht/contact.py index 0121afdc34..cd010d0c82 100644 --- a/lbrynet/dht/contact.py +++ b/lbrynet/dht/contact.py @@ -35,6 +35,15 @@ def __init__(self, contactManager, id, ipAddress, udpPort, networkProtocol, firs self.lastReplied = None self.lastRequested = None self.protocolVersion = constants.protocolVersion + self._token = (None, 0) # token, timestamp + + def update_token(self, token): + self._token = token, self.getTime() + + @property + def token(self): + # expire the token 1 minute early to be safe + return self._token[0] if self._token[1] + 240 > self.getTime() else None @property def lastInteracted(self): diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 200d31ade4..18c8f8f416 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -303,55 +303,41 @@ def hasContacts(self): def bucketsWithContacts(self): return self._routingTable.bucketsWithContacts() + @defer.inlineCallbacks + def storeToContact(self, blob_hash, contact): + try: + token = contact.token + if not token: + find_value_response = yield contact.findValue(blob_hash) + token = find_value_response['token'] + contact.update_token(token) + res = yield contact.store(blob_hash, token, self.peerPort, self.node_id, 0) + if res != "OK": + raise ValueError(res) + defer.returnValue(True) + log.debug("Stored %s to %s (%s)", binascii.hexlify(blob_hash), contact.log_id(), contact.address) + except protocol.TimeoutError: + log.debug("Timeout while storing blob_hash %s at %s", + binascii.hexlify(blob_hash), contact.log_id()) + except ValueError as err: + log.error("Unexpected response: %s" % err.message) + except Exception as err: + log.error("Unexpected error while storing blob_hash %s at %s: %s", + binascii.hexlify(blob_hash), contact, err) + defer.returnValue(False) + @defer.inlineCallbacks def announceHaveBlob(self, blob_hash): - known_nodes = {} contacts = yield self.iterativeFindNode(blob_hash) - # store locally if we're the closest node and there are less than k contacts to try storing to - if self.externalIP is not None and contacts and len(contacts) < constants.k: - is_closer = Distance(blob_hash).is_closer(self.node_id, contacts[-1].id) - if is_closer: - contacts.pop() - self_contact = self.contact_manager.make_contact(self.node_id, self.externalIP, - self.port, self._protocol) - token = self.make_token(self_contact.compact_ip()) - yield self.store(self_contact, blob_hash, token, self.peerPort, self.node_id, 0) - elif self.externalIP is not None: - pass - else: - raise Exception("Cannot determine external IP: %s" % self.externalIP) - - contacted = [] - - @defer.inlineCallbacks - def announce_to_contact(contact): - known_nodes[contact.id] = contact - try: - response = yield contact.findValue(blob_hash) - res = yield contact.store(blob_hash, response['token'], self.peerPort, self.node_id, 0) - if res != "OK": - raise ValueError(res) - contacted.append(contact) - log.debug("Stored %s to %s (%s)", binascii.hexlify(blob_hash), contact.log_id(), contact.address) - except protocol.TimeoutError: - log.debug("Timeout while storing blob_hash %s at %s", - binascii.hexlify(blob_hash), contact.log_id()) - except ValueError as err: - log.error("Unexpected response: %s" % err.message) - except Exception as err: - log.error("Unexpected error while storing blob_hash %s at %s: %s", - binascii.hexlify(blob_hash), contact, err) - - dl = [] - for c in contacts: - dl.append(announce_to_contact(c)) - - yield defer.DeferredList(dl) + if not self.externalIP: + raise Exception("Cannot determine external IP: %s" % self.externalIP) + stored_to = yield DeferredDict({contact: self.storeToContact(blob_hash, contact) for contact in contacts}) + contacted_node_ids = map( + lambda contact: contact.id.encode('hex'), filter(lambda contact: stored_to[contact], stored_to.keys()) + ) log.debug("Stored %s to %i of %i attempted peers", binascii.hexlify(blob_hash), - len(contacted), len(contacts)) - - contacted_node_ids = [c.id.encode('hex') for c in contacted] + len(contacted_node_ids), len(contacts)) defer.returnValue(contacted_node_ids) def change_token(self): From 537df6c8ad46553b922a228972dc8674b7ed146d Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Tue, 5 Jun 2018 15:09:19 -0400 Subject: [PATCH 63/79] log socket errors --- lbrynet/dht/protocol.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lbrynet/dht/protocol.py b/lbrynet/dht/protocol.py index d8ccf69af9..73b81bc2cf 100644 --- a/lbrynet/dht/protocol.py +++ b/lbrynet/dht/protocol.py @@ -375,7 +375,8 @@ def _write(self, txData, address): # this should probably try to retransmit when the network connection is back log.error("Network is unreachable") else: - log.error("DHT socket error: %s (%i)", err.message, err.errno) + log.error("DHT socket error sending %i bytes to %s:%i - %s (code %i)", + len(txData), address[0], address[1], err.message, err.errno) raise err else: raise TransportNotConnected() From b0a741b1f4e4b6179ac9f387c029b4d42f6d0452 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 6 Jun 2018 17:11:27 -0400 Subject: [PATCH 64/79] fix hash announcer semaphore --- lbrynet/dht/hashannouncer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lbrynet/dht/hashannouncer.py b/lbrynet/dht/hashannouncer.py index 7c89c1c688..66ce3306d6 100644 --- a/lbrynet/dht/hashannouncer.py +++ b/lbrynet/dht/hashannouncer.py @@ -18,6 +18,7 @@ def __init__(self, dht_node, storage, concurrent_announcers=None): self.concurrent_announcers = concurrent_announcers or conf.settings['concurrent_announcers'] self._manage_lc = task.LoopingCall(self.manage) self._manage_lc.clock = self.clock + self.sem = defer.DeferredSemaphore(self.concurrent_announcers) def start(self): self._manage_lc.start(30) @@ -55,8 +56,9 @@ def immediate_announce(self, blob_hashes): progress_lc = task.LoopingCall(self._show_announce_progress, len(self.hash_queue), start) progress_lc.clock = self.clock progress_lc.start(60, now=False) - s = defer.DeferredSemaphore(self.concurrent_announcers) - results = yield utils.DeferredDict({blob_hash: s.run(self.do_store, blob_hash) for blob_hash in blob_hashes}) + results = yield utils.DeferredDict( + {blob_hash: self.sem.run(self.do_store, blob_hash) for blob_hash in blob_hashes} + ) now = self.clock.seconds() progress_lc.stop() From db06191c3310f01da9f017ea7bb3b474c0b846ee Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 6 Jun 2018 17:18:13 -0400 Subject: [PATCH 65/79] reduce default concurrent announcers to 10 -lower rpc timeout to what it originally was --- lbrynet/conf.py | 2 +- lbrynet/dht/constants.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lbrynet/conf.py b/lbrynet/conf.py index 57dad28502..ddd4b4c7d8 100644 --- a/lbrynet/conf.py +++ b/lbrynet/conf.py @@ -40,7 +40,7 @@ KB = 2 ** 10 MB = 2 ** 20 -DEFAULT_CONCURRENT_ANNOUNCERS = 100 +DEFAULT_CONCURRENT_ANNOUNCERS = 10 DEFAULT_DHT_NODES = [ ('lbrynet1.lbry.io', 4444), diff --git a/lbrynet/dht/constants.py b/lbrynet/dht/constants.py index e06aae1cdc..bf48d005c7 100644 --- a/lbrynet/dht/constants.py +++ b/lbrynet/dht/constants.py @@ -25,7 +25,7 @@ replacementCacheSize = 8 #: Timeout for network operations (in seconds) -rpcTimeout = 8 +rpcTimeout = 5 # number of rpc attempts to make before a timeout results in the node being removed as a contact rpcAttempts = 5 From 4fbaaac3f3f0edf28ed9c60be884158ecc1454c1 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 6 Jun 2018 17:18:29 -0400 Subject: [PATCH 66/79] default new contacts to protocol version 0 --- lbrynet/dht/contact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lbrynet/dht/contact.py b/lbrynet/dht/contact.py index cd010d0c82..51eb10fe17 100644 --- a/lbrynet/dht/contact.py +++ b/lbrynet/dht/contact.py @@ -34,7 +34,7 @@ def __init__(self, contactManager, id, ipAddress, udpPort, networkProtocol, firs self.getTime = self._contactManager._get_time self.lastReplied = None self.lastRequested = None - self.protocolVersion = constants.protocolVersion + self.protocolVersion = 0 self._token = (None, 0) # token, timestamp def update_token(self, token): From adca5f59936fbea40ef17c0033e7b2187217a696 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 6 Jun 2018 17:18:48 -0400 Subject: [PATCH 67/79] fix routing_table_get --- lbrynet/daemon/Daemon.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/lbrynet/daemon/Daemon.py b/lbrynet/daemon/Daemon.py index 83ba743da5..6fa9fb148b 100644 --- a/lbrynet/daemon/Daemon.py +++ b/lbrynet/daemon/Daemon.py @@ -3150,18 +3150,13 @@ def jsonrpc_routing_table_get(self): """ result = {} - data_store = deepcopy(self.session.dht_node._dataStore._dict) + data_store = self.session.dht_node._dataStore._dict datastore_len = len(data_store) hosts = {} if datastore_len: for k, v in data_store.iteritems(): - for value, lastPublished, originallyPublished, originalPublisherID in v: - try: - contact = self.session.dht_node._routingTable.getContact( - originalPublisherID) - except (ValueError, IndexError): - continue + for contact, value, lastPublished, originallyPublished, originalPublisherID in v: if contact in hosts: blobs = hosts[contact] else: From e8b402f9980b60eaa13fa66ef0790c98d830028e Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 6 Jun 2018 17:21:35 -0400 Subject: [PATCH 68/79] remove deferredLock from iterativeFind -fire the first iteration right away --- lbrynet/dht/iterativefind.py | 45 +++++++++++------------------------- 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/lbrynet/dht/iterativefind.py b/lbrynet/dht/iterativefind.py index 608fd54185..957c69d5b2 100644 --- a/lbrynet/dht/iterativefind.py +++ b/lbrynet/dht/iterativefind.py @@ -43,7 +43,6 @@ def __init__(self, node, shortlist, key, rpc): self._iteration_count = 0 self.find_value_result = {} self.pending_iteration_calls = [] - self._lock = defer.DeferredLock() @property def is_find_node_request(self): @@ -83,8 +82,6 @@ def extendShortlist(self, contact, result): if contact.id == self.node.node_id: defer.returnValue(contact.id) - yield self._lock.acquire() - if contact not in self.active_contacts: self.active_contacts.append(contact) if contact not in self.shortlist: @@ -97,7 +94,6 @@ def extendShortlist(self, contact, result): if self.is_find_value_request and self.key in result: # We have found the value self.find_value_result[self.key] = result[self.key] - self._lock.release() self.finished_deferred.callback(self.find_value_result) else: if self.is_find_value_request: @@ -121,12 +117,9 @@ def extendShortlist(self, contact, result): if found_contact not in self.shortlist: self.shortlist.append(found_contact) - self._lock.release() - - if not self.finished_deferred.called: - if self.should_stop(): - self.sortByDistance(self.active_contacts) - self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) + if not self.finished_deferred.called and self.should_stop(): + self.sortByDistance(self.active_contacts) + self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) defer.returnValue(contact.id) @@ -141,20 +134,15 @@ def probeContact(self, contact): defer.returnValue(contact.id) def should_stop(self): - active_contacts_len = len(self.active_contacts) - if active_contacts_len >= constants.k: - # log.info("there are enough results %s(%s)", self.rpc, self.key.encode('hex')) + if self.prev_closest_node and self.closest_node and self.distance.is_closer(self.prev_closest_node.id, + self.closest_node.id): return True - if self.prev_closest_node and self.closest_node and self.distance.is_closer( - self.prev_closest_node.id, self.closest_node.id): - # log.info("not getting any closer %s(%s)", self.rpc, self.key.encode('hex')) + if len(self.active_contacts) >= constants.k: return True return False # Send parallel, asynchronous FIND_NODE RPCs to the shortlist of contacts - @defer.inlineCallbacks def _searchIteration(self): - yield self._lock.acquire() # Sort the discovered active nodes from closest to furthest if len(self.active_contacts): self.sortByDistance(self.active_contacts) @@ -178,25 +166,18 @@ def _searchIteration(self): for contact in to_remove: # these contacts will be re-added to the shortlist when they reply successfully self.shortlist.remove(contact) - # log.info("Active probes: %i, contacted %i/%i (%s)", len(self.active_probes), - # len(self.active_contacts), len(self.already_contacted), hex(id(self))) - # run the probes if probes: # Schedule the next iteration if there are any active # calls (Kademlia uses loose parallelism) self.searchIteration() - self._lock.release() - d = defer.gatherResults(probes) + d = defer.DeferredList(probes, consumeErrors=True) - @defer.inlineCallbacks def _remove_probes(results): - yield self._lock.acquire() for probe in probes: self.active_probes.remove(probe) - self._lock.release() - defer.returnValue(results) + return results d.addCallback(_remove_probes) @@ -204,8 +185,11 @@ def _remove_probes(results): # If no probes were sent, there will not be any improvement, so we're done self.sortByDistance(self.active_contacts) self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) + elif not self.finished_deferred.called and self.should_stop(): + self.sortByDistance(self.active_contacts) + self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) - def searchIteration(self): + def searchIteration(self, delay=constants.iterativeLookupDelay): def _cancel_pending_iterations(result): while self.pending_iteration_calls: canceller = self.pending_iteration_calls.pop() @@ -213,12 +197,11 @@ def _cancel_pending_iterations(result): return result self.finished_deferred.addBoth(_cancel_pending_iterations) self._iteration_count += 1 - # log.debug("iteration %i %s(%s...)", self._iteration_count, self.rpc, self.key.encode('hex')[:8]) - call, cancel = self.node.reactor_callLater(1, self._search_iteration_semaphore.run, self._searchIteration) + call, cancel = self.node.reactor_callLater(delay, self._search_iteration_semaphore.run, self._searchIteration) self.pending_iteration_calls.append(cancel) def iterativeFind(node, shortlist, key, rpc): helper = _IterativeFind(node, shortlist, key, rpc) - helper.searchIteration() + helper.searchIteration(0) return helper.finished_deferred From 44644673d7da6bd1c09f3c7429b0be5dffe1fc28 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 6 Jun 2018 17:21:56 -0400 Subject: [PATCH 69/79] add profiler --- lbrynet/core/utils.py | 77 +++++++++++++++++++++++++++++++++++++++++++ lbrynet/dht/node.py | 2 ++ 2 files changed, 79 insertions(+) diff --git a/lbrynet/core/utils.py b/lbrynet/core/utils.py index ce0d433f2b..6498c52e9e 100644 --- a/lbrynet/core/utils.py +++ b/lbrynet/core/utils.py @@ -172,3 +172,80 @@ def DeferredDict(d, consumeErrors=False): if success: response[k] = result defer.returnValue(response) + + + +import traceback +import functools +import logging +from twisted.internet import defer +from twisted.python.failure import Failure + +log = logging.getLogger(__name__) + + +class DeferredProfiler(object): + def __init__(self): + self.profile_results = {} + + def add_result(self, fn, start_time, finished_time, stack, success): + self.profile_results[fn].append((start_time, finished_time, stack, success)) + + def show_profile_results(self, fn): + profile_results = list(self.profile_results[fn]) + call_counts = { + caller: [(start, finished, finished - start, success) + for (start, finished, _caller, success) in profile_results + if _caller == caller] + for caller in set(result[2] for result in profile_results) + } + + log.info("called %s %i times from %i sources\n", fn.__name__, len(profile_results), len(call_counts)) + for caller in sorted(list(call_counts.keys()), key=lambda c: len(call_counts[c]), reverse=True): + call_info = call_counts[caller] + times = [r[2] for r in call_info] + own_time = sum(times) + times.sort() + longest = 0 if not times else times[-1] + shortest = 0 if not times else times[0] + log.info( + "%i successes and %i failures\nlongest %f, shortest %f, avg %f\ncaller:\n%s", + len([r for r in call_info if r[3]]), + len([r for r in call_info if not r[3]]), + longest, shortest, own_time / float(len(call_info)), caller + ) + + def profiled_deferred(self, reactor=None): + if not reactor: + from twisted.internet import reactor + + def _cb(result, fn, start, caller_info): + if isinstance(result, (Failure, Exception)): + error = result + result = None + else: + error = None + self.add_result(fn, start, reactor.seconds(), caller_info, error is None) + if error is None: + return result + raise error + + def _profiled_deferred(fn): + reactor.addSystemEventTrigger("after", "shutdown", self.show_profile_results, fn) + self.profile_results[fn] = [] + + @functools.wraps(fn) + def _wrapper(*args, **kwargs): + caller_info = "".join(traceback.format_list(traceback.extract_stack()[-3:-1])) + start = reactor.seconds() + d = defer.maybeDeferred(fn, *args, **kwargs) + d.addBoth(_cb, fn, start, caller_info) + return d + + return _wrapper + + return _profiled_deferred + + +_profiler = DeferredProfiler() +profile_deferred = _profiler.profiled_deferred diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 18c8f8f416..e919847a28 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -592,6 +592,8 @@ def _generateID(self): """ return generate_id() + # from lbrynet.core.utils import profile_deferred + # @profile_deferred() @defer.inlineCallbacks def _iterativeFind(self, key, startupShortlist=None, rpc='findNode'): """ The basic Kademlia iterative lookup operation (for nodes/values) From 16cb6d86575b2476b8c2c2e821d839ee20acc1c0 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 6 Jun 2018 17:22:11 -0400 Subject: [PATCH 70/79] remove Session._join_deferred --- lbrynet/core/Session.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lbrynet/core/Session.py b/lbrynet/core/Session.py index 634fa99097..28c69ca60a 100644 --- a/lbrynet/core/Session.py +++ b/lbrynet/core/Session.py @@ -118,7 +118,6 @@ def __init__(self, blob_data_payment_rate, db_dir=None, node_id=None, peer_manag # self.payment_rate_manager_class = payment_rate_manager_class or NegotiatedPaymentRateManager # self.is_generous = is_generous self.storage = storage or SQLiteStorage(self.db_dir) - self._join_dht_deferred = None def setup(self): """Create the blob directory and database if necessary, start all desired services""" @@ -230,9 +229,9 @@ def _setup_dht(self): # does not block startup, the dht will re-attempt if nece self.hash_announcer = hashannouncer.DHTHashAnnouncer(self.dht_node, self.storage) self.peer_manager = self.dht_node.peer_manager self.peer_finder = self.dht_node.peer_finder - self._join_dht_deferred = self.dht_node.start(self.known_dht_nodes) - self._join_dht_deferred.addCallback(lambda _: log.info("Joined the dht")) - self._join_dht_deferred.addCallback(lambda _: self.hash_announcer.start()) + d = self.dht_node.start(self.known_dht_nodes) + d.addCallback(lambda _: log.info("Joined the dht")) + d.addCallback(lambda _: self.hash_announcer.start()) def _setup_other_components(self): log.debug("Setting up the rest of the components") From bc0da5e2d1b14df82b36875e2ccf134153322634 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 6 Jun 2018 17:22:36 -0400 Subject: [PATCH 71/79] only use seeds in iterative bootstrap if no contacts are known yet --- lbrynet/dht/node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index e919847a28..39fa63a49a 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -235,7 +235,7 @@ def _initialize_routing(): defer.returnValue(None) else: # find the closest peers to us - closest = yield self._iterativeFind(self.node_id, shortlist) + closest = yield self._iterativeFind(self.node_id, shortlist if not self.contacts else None) yield _ping_contacts(closest) # query random hashes in our bucket key ranges to fill or split them random_ids_in_range = self._routingTable.getRefreshList(force=True) From ae631f05c37b8663d3f622d9a73c90d08d18e031 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 6 Jun 2018 17:22:52 -0400 Subject: [PATCH 72/79] fix teardown error --- lbrynet/dht/node.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index 39fa63a49a..e593b8fcd1 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -22,7 +22,6 @@ import protocol from peerfinder import DHTPeerFinder from contact import ContactManager -from distance import Distance from iterativefind import iterativeFind @@ -156,7 +155,7 @@ def __init__(self, node_id=None, udpPort=4000, dataStore=None, def __del__(self): log.warning("unclean shutdown of the dht node") - if self._listeningPort is not None: + if hasattr(self, "_listeningPort") and self._listeningPort is not None: self._listeningPort.stopListening() @defer.inlineCallbacks From af096aee41ea35f153d14e86a17cccb9804e5a8e Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Wed, 6 Jun 2018 17:23:02 -0400 Subject: [PATCH 73/79] update test --- lbrynet/tests/functional/dht/test_contact_rpc.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/lbrynet/tests/functional/dht/test_contact_rpc.py b/lbrynet/tests/functional/dht/test_contact_rpc.py index f6233be258..90be98aec7 100644 --- a/lbrynet/tests/functional/dht/test_contact_rpc.py +++ b/lbrynet/tests/functional/dht/test_contact_rpc.py @@ -171,8 +171,6 @@ def findValue(contact, key): result.pop('protocolVersion') return result - self.assertEquals(self.remote_contact.protocolVersion, 1) - self.remote_node.findValue = findValue d = self.remote_contact.findValue(fake_blob) self._reactor.advance(3) @@ -196,9 +194,6 @@ def findValue(contact, key): @defer.inlineCallbacks def testStoreToPre_0_20_0_Node(self): - - self.remote_node._protocol._protocolVersion = 0 - def _dont_migrate(contact, method, *args): return args, {} @@ -224,8 +219,6 @@ def store(contact, key, value, originalPublisherID=None, self_store=False, **kwa contact, key, value['token'], value['port'], originalPublisherID, 0 ) - self.assertEquals(self.remote_contact.protocolVersion, 1) - self.remote_node.findValue = findValue self.remote_node.store = store @@ -247,9 +240,6 @@ def store(contact, key, value, originalPublisherID=None, self_store=False, **kwa @defer.inlineCallbacks def testStoreFromPre_0_20_0_Node(self): - - self.remote_node._protocol._protocolVersion = 0 - def _dont_migrate(contact, method, *args): return args From b0e4fc4faaaac45c566aa88faee07196f85054d6 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 7 Jun 2018 11:39:20 -0400 Subject: [PATCH 74/79] fix iterative find lockup --- lbrynet/dht/iterativefind.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/lbrynet/dht/iterativefind.py b/lbrynet/dht/iterativefind.py index 957c69d5b2..9844053e4d 100644 --- a/lbrynet/dht/iterativefind.py +++ b/lbrynet/dht/iterativefind.py @@ -136,8 +136,10 @@ def probeContact(self, contact): def should_stop(self): if self.prev_closest_node and self.closest_node and self.distance.is_closer(self.prev_closest_node.id, self.closest_node.id): + # we're getting further away return True if len(self.active_contacts) >= constants.k: + # we have enough results return True return False @@ -149,7 +151,7 @@ def _searchIteration(self): self.prev_closest_node = self.closest_node self.closest_node = self.active_contacts[0] - # Sort and store the current shortList length before contacting other nodes + # Sort the current shortList before contacting other nodes self.sortByDistance(self.shortlist) probes = [] already_contacted_addresses = {(c.address, c.port) for c in self.already_contacted} @@ -181,13 +183,13 @@ def _remove_probes(results): d.addCallback(_remove_probes) - elif not self.finished_deferred.called and not self.active_probes: + elif not self.finished_deferred.called and not self.active_probes or self.should_stop(): # If no probes were sent, there will not be any improvement, so we're done self.sortByDistance(self.active_contacts) self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) - elif not self.finished_deferred.called and self.should_stop(): - self.sortByDistance(self.active_contacts) - self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))]) + elif not self.finished_deferred.called: + # Force the next iteration + self.searchIteration() def searchIteration(self, delay=constants.iterativeLookupDelay): def _cancel_pending_iterations(result): From f3e848b2e278ef8d6d34aa750661a108fdb1face Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 7 Jun 2018 11:51:22 -0400 Subject: [PATCH 75/79] work around upnp bug this fixes an incorrectly raised mapping conflict error which the router raises when there is a redirect for the same internal port and a different lan address. --- lbrynet/core/Session.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/lbrynet/core/Session.py b/lbrynet/core/Session.py index 28c69ca60a..3e52b96499 100644 --- a/lbrynet/core/Session.py +++ b/lbrynet/core/Session.py @@ -166,24 +166,24 @@ def get_free_port(upnp, port, protocol): if not mapping: return port if upnp.lanaddr == mapping[0]: - return mapping + return mapping[1] return get_free_port(upnp, port + 1, protocol) - def get_port_mapping(upnp, internal_port, protocol, description): + def get_port_mapping(upnp, port, protocol, description): # try to map to the requested port, if there is already a mapping use the next external # port available if protocol not in ['UDP', 'TCP']: raise Exception("invalid protocol") - external_port = get_free_port(upnp, internal_port, protocol) - if isinstance(external_port, tuple): + port = get_free_port(upnp, port, protocol) + if isinstance(port, tuple): log.info("Found existing UPnP redirect %s:%i (%s) to %s:%i, using it", - self.external_ip, external_port[1], protocol, upnp.lanaddr, internal_port) - return external_port[1], protocol - upnp.addportmapping(external_port, protocol, upnp.lanaddr, internal_port, + self.external_ip, port, protocol, upnp.lanaddr, port) + return port + upnp.addportmapping(port, protocol, upnp.lanaddr, port, description, '') - log.info("Set UPnP redirect %s:%i (%s) to %s:%i", self.external_ip, external_port, - protocol, upnp.lanaddr, internal_port) - return external_port, protocol + log.info("Set UPnP redirect %s:%i (%s) to %s:%i", self.external_ip, port, + protocol, upnp.lanaddr, port) + return port def threaded_try_upnp(): if self.use_upnp is False: @@ -198,13 +198,11 @@ def threaded_try_upnp(): # best not to rely on this external ip, the router can be behind layers of NATs self.external_ip = external_ip if self.peer_port: - self.upnp_redirects.append( - get_port_mapping(u, self.peer_port, 'TCP', 'LBRY peer port') - ) + self.peer_port = get_port_mapping(u, self.peer_port, 'TCP', 'LBRY peer port') + self.upnp_redirects.append((self.peer_port, 'TCP')) if self.dht_node_port: - self.upnp_redirects.append( - get_port_mapping(u, self.dht_node_port, 'UDP', 'LBRY DHT port') - ) + self.dht_node_port = get_port_mapping(u, self.dht_node_port, 'UDP', 'LBRY DHT port') + self.upnp_redirects.append((self.dht_node_port, 'UDP')) return True return False From 7f3ead67bfb07bd3f8370311bd97d9f15515920f Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 7 Jun 2018 12:16:27 -0400 Subject: [PATCH 76/79] disable forced bucket refresh during join --- lbrynet/dht/node.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lbrynet/dht/node.py b/lbrynet/dht/node.py index e593b8fcd1..0c38146a5b 100644 --- a/lbrynet/dht/node.py +++ b/lbrynet/dht/node.py @@ -236,10 +236,10 @@ def _initialize_routing(): # find the closest peers to us closest = yield self._iterativeFind(self.node_id, shortlist if not self.contacts else None) yield _ping_contacts(closest) - # query random hashes in our bucket key ranges to fill or split them - random_ids_in_range = self._routingTable.getRefreshList(force=True) - while random_ids_in_range: - yield self.iterativeFindNode(random_ids_in_range.pop()) + # # query random hashes in our bucket key ranges to fill or split them + # random_ids_in_range = self._routingTable.getRefreshList() + # while random_ids_in_range: + # yield self.iterativeFindNode(random_ids_in_range.pop()) defer.returnValue(None) @defer.inlineCallbacks @@ -252,7 +252,7 @@ def _iterative_join(joined_d=None, last_buckets_with_contacts=None): if not joined_d.called: joined_d.callback(True) elif buckets_with_contacts < 4: - self.reactor_callLater(1, _iterative_join, joined_d, buckets_with_contacts) + self.reactor_callLater(0, _iterative_join, joined_d, buckets_with_contacts) elif not joined_d.called: joined_d.callback(None) yield joined_d From a821647fbcf7369d360284fadf816095d7aecd85 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 7 Jun 2018 12:18:07 -0400 Subject: [PATCH 77/79] pylint and appveyor --- .appveyor.yml | 2 +- lbrynet/core/utils.py | 20 ++++++-------------- lbrynet/tests/unit/dht/test_node.py | 4 +++- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index de1bafe6f0..a1d4dc4480 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -23,7 +23,7 @@ clone_folder: c:\projects\lbry test_script: - cd C:\projects\lbry\ - pip install cython -- pip install mock pylint unqlite +- pip install mock pylint unqlite Faker - pip install . - pylint lbrynet # disable tests for now so that appveyor can build the app diff --git a/lbrynet/core/utils.py b/lbrynet/core/utils.py index 6498c52e9e..f8ada44dc4 100644 --- a/lbrynet/core/utils.py +++ b/lbrynet/core/utils.py @@ -1,21 +1,23 @@ import base64 import datetime -import logging import random import socket import string import json - +import traceback +import functools +import logging import pkg_resources +from twisted.python.failure import Failure from twisted.internet import defer from lbryschema.claim import ClaimDict from lbrynet.core.cryptoutils import get_lbry_hash_obj +log = logging.getLogger(__name__) + # digest_size is in bytes, and blob hashes are hex encoded blobhash_length = get_lbry_hash_obj().digest_size * 2 -log = logging.getLogger(__name__) - # defining these time functions here allows for easier overriding in testing def now(): @@ -174,16 +176,6 @@ def DeferredDict(d, consumeErrors=False): defer.returnValue(response) - -import traceback -import functools -import logging -from twisted.internet import defer -from twisted.python.failure import Failure - -log = logging.getLogger(__name__) - - class DeferredProfiler(object): def __init__(self): self.profile_results = {} diff --git a/lbrynet/tests/unit/dht/test_node.py b/lbrynet/tests/unit/dht/test_node.py index f5fe876abd..93ee047e32 100644 --- a/lbrynet/tests/unit/dht/test_node.py +++ b/lbrynet/tests/unit/dht/test_node.py @@ -62,7 +62,9 @@ def setUp(self): def testStore(self): """ Tests if the node can store (and privately retrieve) some data """ for key, port in self.cases: - yield self.node.store(self.contact, key, self.token, port, self.contact.id, 0) + yield self.node.store( # pylint: disable=too-many-function-args + self.contact, key, self.token, port, self.contact.id, 0 + ) for key, value in self.cases: expected_result = self.contact.compact_ip() + str(struct.pack('>H', value)) + \ self.contact.id From 1d0106929c4e31767e45e7ca616ca28edbf677f7 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 7 Jun 2018 14:18:23 -0400 Subject: [PATCH 78/79] move daemon test to the same folder as the others --- .../unit/{daemon => lbrynet_daemon}/test_claims_comparator.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename lbrynet/tests/unit/{daemon => lbrynet_daemon}/test_claims_comparator.py (100%) diff --git a/lbrynet/tests/unit/daemon/test_claims_comparator.py b/lbrynet/tests/unit/lbrynet_daemon/test_claims_comparator.py similarity index 100% rename from lbrynet/tests/unit/daemon/test_claims_comparator.py rename to lbrynet/tests/unit/lbrynet_daemon/test_claims_comparator.py From 665c73c38c172c0555e0589782dffba7c654b52a Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Thu, 7 Jun 2018 14:18:27 -0400 Subject: [PATCH 79/79] changelog --- CHANGELOG.md | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d417dfee76..f7c7fef62f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,10 +36,10 @@ at anytime. * several internal dht functions to use inlineCallbacks * `DHTHashAnnouncer` and `Node` manage functions to use `LoopingCall`s instead of scheduling with `callLater`. * `store` kademlia rpc method to block on the call finishing and to return storing peer information - * refactored `DHTHashAnnouncer` to longer use locks, use a `DeferredSemaphore` to limit concurrent announcers + * refactored `DHTHashAnnouncer` to no longer use locks, use a `DeferredSemaphore` to limit concurrent announcers * decoupled `DiskBlobManager` from `DHTHashAnnouncer` * blob hashes to announce to be controlled by`SQLiteStorage` - * kademlia protocol to not delay writes to the UDP socket + * kademlia protocol to minimally delay writes to the UDP socket * `reactor` and `callLater`, `listenUDP`, and `resolve` functions to be configurable (to allow easier testing) * calls to get the current time to use `reactor.seconds` (to control callLater and LoopingCall timing in tests) * `blob_announce` to queue the blob announcement but not block on it @@ -56,21 +56,34 @@ at anytime. * track successful reflector uploads in sqlite to minimize how many streams are attempted by auto re-reflect * increase the default `auto_re_reflect_interval` to a day * predictable result sorting for `claim_list` and `claim_list_mine` + * changed the bucket splitting condition in the dht routing table to be more aggressive + * ping dht nodes who have stored to us periodically to determine whether we should include them as an active peer for the hash when we are queried. Nodes that are known to be not reachable by the node storing the record are no longer returned as peers by the storing node. + * temporarily disabled data price negotiation, treat all data as free + * changed dht bootstrap join process to better populate the routing table initially + * cache dht node tokens used during announcement to minimize the number of requests that are needed + * implement BEP0005 dht rules to classify nodes as good, bad, or unknown and for when to add them to the routing table (http://www.bittorrent.org/beps/bep_0005.html) + * refactored internal dht contact class to track failure counts/times, the time the contact last replied to us, and the time the node last requested something fom us + * refactored dht iterativeFind + * sort dht contacts returned by `findCloseNodes` in the routing table + * disabled Cryptonator price feed ### Added * virtual kademlia network and mock udp transport for dht integration tests - * integration tests for bootstrapping the dht + * functional tests for bootstrapping the dht, announcing and expiring hashes, finding and pinging nodes, protocol version 0/1 backwards/forwards compatibility, and rejoining the network * configurable `concurrent_announcers` and `s3_headers_depth` settings * `peer_ping` command * `--sort` option in `file_list` * linux distro and desktop name added to analytics * certifi module for Twisted SSL verification on Windows + * protocol version to dht requests and to the response from `findValue` + * added `port` field to contacts returned by `routing_table_get` ### Removed * `announce_all` argument from `blob_announce` * old `blob_announce_all` command * `AuthJSONRPCServer.auth_required` decorator * unused `--wallet` argument to `lbrynet-daemon`, which used to be to support `PTCWallet`. + * `OptimizedTreeRoutingTable` class used by the dht node for the time being ## [0.19.3] - 2018-05-04 ### Changed