diff --git a/flathunter/config.py b/flathunter/config.py index a47c0b30..55b919ed 100644 --- a/flathunter/config.py +++ b/flathunter/config.py @@ -1,6 +1,6 @@ """Wrap configuration options as an object""" import os -from typing import Optional, Dict, Any +from typing import List, Optional, Dict, Any import json import yaml @@ -18,7 +18,8 @@ from flathunter.crawler.wggesucht import WgGesucht from flathunter.crawler.vrmimmo import VrmImmo from flathunter.crawler.subito import Subito -from flathunter.filter import Filter +from flathunter.dataclasses import DistanceConfig +from flathunter.gmaps_duration_processor import TransportationModes from flathunter.logging import logger from flathunter.exceptions import ConfigException @@ -172,12 +173,6 @@ def searchers(self): """Get the list of search plugins""" return self.__searchers__ - def get_filter(self): - """Read the configured filter""" - builder = Filter.builder() - builder.read_config(self) - return builder.build() - def captcha_enabled(self): """Check if captcha is configured""" return self._get_captcha_solver() is not None @@ -354,6 +349,23 @@ def max_price_per_square(self): """Return the configured maximum price per square meter""" return self._get_filter_config("max_price_per_square") + def max_distance(self) -> List[DistanceConfig]: + """Return the configured maximum distance to locations.""" + config = self._get_filter_config("max_distance") + if config is None: + return [] + out = [] + for distance_filter_item in config: + out.append( + DistanceConfig( + location_name=distance_filter_item['location_name'], + transport_mode=TransportationModes(distance_filter_item['transportation_mode']), + max_distance_meters=distance_filter_item.get('max_distance_meters'), + max_duration_seconds=distance_filter_item.get('max_duration_seconds') + ) + ) + return out + def __repr__(self): return json.dumps({ "captcha_enabled": self.captcha_enabled(), diff --git a/flathunter/crawler/vrmimmo.py b/flathunter/crawler/vrmimmo.py index 2ff3acec..ee6ee99b 100644 --- a/flathunter/crawler/vrmimmo.py +++ b/flathunter/crawler/vrmimmo.py @@ -2,7 +2,7 @@ import re import hashlib -from bs4 import BeautifulSoup, Tag +from bs4 import BeautifulSoup from flathunter.logging import logger from flathunter.abstract_crawler import Crawler @@ -29,7 +29,7 @@ def extract_data(self, soup: BeautifulSoup): link = item.find("a", {"class": "js-item-title-link ci-search-result__link"}) url = link.get("href") title = link.get("title") - logger.debug("Analyze " + url) + logger.debug("Analyze %s", url) try: price = item.find("div", {"class": "item__spec item-spec-price"}).text diff --git a/flathunter/dataclasses.py b/flathunter/dataclasses.py new file mode 100644 index 00000000..7f61d9eb --- /dev/null +++ b/flathunter/dataclasses.py @@ -0,0 +1,67 @@ +""" +This module contains dataclasses to help with serialisation and typechecking of data +sent to and received from the Google Maps Distance API +""" +from dataclasses import dataclass +from enum import Enum +from typing import Optional + + +class TransportationModes(Enum): + """The transportation mode for Google Maps distance calculation.""" + TRANSIT = 'transit' + BICYCLING = 'bicycling' + DRIVING = 'driving' + WALKING = 'walking' + + +@dataclass +class DistanceValueTuple: + """We want to keep both the numeric value of a distance, and its string representation.""" + meters: float + text: str + + +@dataclass +class DurationValueTuple: + """We want to keep both the numeric value of a duration, and its string representation.""" + seconds: float + text: str + + +@dataclass +class DistanceElement: + """Represents the distance from a property to some location.""" + duration: DurationValueTuple + distance: DistanceValueTuple + mode: TransportationModes + + +@dataclass +class DistanceConfig: + """Represents distance filter information in the configuration file. + + location_name must refer to the location name used to identify the location + in the durations section of the config file, and the transport_mode must be + configured in the durations section for that location name, lest no information + is available to actually filter on.""" + location_name: str + transport_mode: TransportationModes + max_distance_meters: Optional[float] + max_duration_seconds: Optional[float] + + +class FilterChainName(Enum): + """Identifies the filter chain that a filter acts on + + Preprocess filters will be run before the expose is processed by any further actions. + Use this chain to filter exposes that can be excluded based on information scraped + from the expose website alone (such as based on price or size). + Postprocess filters will be run after other actions have completed. Use this if you + require additional information from other steps, such as information from the Google + Maps API, to make a decision on this expose. + + We separate the filter chains to avoid making expensive (literally!) calls to the + Google Maps API for exposes that we already know we aren't interested in anyway.""" + PREPROCESS = 'PREPROCESS' + POSTPROCESS = 'POSTPROCESS' diff --git a/flathunter/filter.py b/flathunter/filter.py index bc1b4979..dc5f4a61 100644 --- a/flathunter/filter.py +++ b/flathunter/filter.py @@ -1,8 +1,12 @@ """Module with implementations of standard expose filters""" -from functools import reduce import re from abc import ABC, ABCMeta -from typing import List, Any +from typing import List, Any, Dict + +from flathunter.config import DistanceConfig +from flathunter.dataclasses import FilterChainName +from flathunter.gmaps_duration_processor import DistanceElement +from flathunter.logging import logger class AbstractFilter(ABC): @@ -172,30 +176,65 @@ def is_interesting(self, expose): return pps <= self.max_pps -class FilterBuilder: +class DistanceFilter(AbstractFilter): + """Exclude properties based on distance or duration to a location + + This must be in the post-processing filter chain, as it requires data + from the Google Maps API, which is not available right after scraping.""" + + distance_config: DistanceConfig + + def __init__(self, distance_config: DistanceConfig): + self.distance_config = distance_config + + def is_interesting(self, expose): + durations: Dict[str, DistanceElement] = expose.get('durations_unformatted', None) + if durations is None or self.distance_config.location_name not in durations: + logger.info('DurationFilter is enabled, but no GMaps data found. Skipping filter.') + return True + distance = durations[self.distance_config.location_name].distance.meters + duration = durations[self.distance_config.location_name].duration.seconds + out = True + if self.distance_config.max_distance_meters: + out &= distance < self.distance_config.max_distance_meters + if self.distance_config.max_duration_seconds: + out &= duration < self.distance_config.max_duration_seconds + return out + + +class FilterChainBuilder: """Construct a filter chain""" filters: List[AbstractFilter] def __init__(self): self.filters = [] - def _append_filter_if_not_empty(self, filter_class: ABCMeta, filter_config: Any): + def _append_filter_if_not_empty( + self, + filter_class: ABCMeta, + filter_config: Any): """Appends a filter to the list if its configuration is set""" if not filter_config: return self.filters.append(filter_class(filter_config)) - def read_config(self, config): + def read_config(self, config, filter_chain: FilterChainName): """Adds filters from a config dictionary""" - self._append_filter_if_not_empty(TitleFilter, config.excluded_titles()) - self._append_filter_if_not_empty(MinPriceFilter, config.min_price()) - self._append_filter_if_not_empty(MaxPriceFilter, config.max_price()) - self._append_filter_if_not_empty(MinSizeFilter, config.min_size()) - self._append_filter_if_not_empty(MaxSizeFilter, config.max_size()) - self._append_filter_if_not_empty(MinRoomsFilter, config.min_rooms()) - self._append_filter_if_not_empty(MaxRoomsFilter, config.max_rooms()) - self._append_filter_if_not_empty( - PPSFilter, config.max_price_per_square()) + if filter_chain == FilterChainName.PREPROCESS: + self._append_filter_if_not_empty(TitleFilter, config.excluded_titles()) + self._append_filter_if_not_empty(MinPriceFilter, config.min_price()) + self._append_filter_if_not_empty(MaxPriceFilter, config.max_price()) + self._append_filter_if_not_empty(MinSizeFilter, config.min_size()) + self._append_filter_if_not_empty(MaxSizeFilter, config.max_size()) + self._append_filter_if_not_empty(MinRoomsFilter, config.min_rooms()) + self._append_filter_if_not_empty(MaxRoomsFilter, config.max_rooms()) + self._append_filter_if_not_empty( + PPSFilter, config.max_price_per_square()) + elif filter_chain == FilterChainName.POSTPROCESS: + for distance_filter in config.max_distance(): + self._append_filter_if_not_empty(DistanceFilter, distance_filter) + else: + raise NotImplementedError() return self def filter_already_seen(self, id_watch): @@ -204,12 +243,12 @@ def filter_already_seen(self, id_watch): return self def build(self): - """Return the compiled filter""" - return Filter(self.filters) + """Return the compiled filter chain""" + return FilterChain(self.filters) -class Filter: - """Abstract filter object""" +class FilterChain: + """Collection of expose filters in use by a hunter instance""" filters: List[AbstractFilter] @@ -218,8 +257,10 @@ def __init__(self, filters: List[AbstractFilter]): def is_interesting_expose(self, expose): """Apply all filters to this expose""" - return reduce((lambda x, y: x and y), - map((lambda x: x.is_interesting(expose)), self.filters), True) + for filter_ in self.filters: + if not filter_.is_interesting(expose): + return False + return True def filter(self, exposes): """Apply all filters to every expose in the list""" @@ -227,5 +268,5 @@ def filter(self, exposes): @staticmethod def builder(): - """Return a new filter builder""" - return FilterBuilder() + """Return a new filter chain builder""" + return FilterChainBuilder() diff --git a/flathunter/gmaps_duration_processor.py b/flathunter/gmaps_duration_processor.py index 843f2975..4da46678 100644 --- a/flathunter/gmaps_duration_processor.py +++ b/flathunter/gmaps_duration_processor.py @@ -2,43 +2,59 @@ import datetime import time from urllib.parse import quote_plus +from typing import Dict import requests +from flathunter.dataclasses import (DistanceElement, + DistanceValueTuple, + DurationValueTuple, + TransportationModes) from flathunter.logging import logger from flathunter.abstract_processor import Processor + class GMapsDurationProcessor(Processor): """Implementation of Processor class to calculate travel durations""" - GM_MODE_TRANSIT = 'transit' - GM_MODE_BICYCLE = 'bicycling' - GM_MODE_DRIVING = 'driving' - def __init__(self, config): self.config = config def process_expose(self, expose): """Calculate the durations for an expose""" - expose['durations'] = self.get_formatted_durations(expose['address']).strip() + durations = self.get_distances_and_durations(expose['address']) + expose['durations'] = self._format_durations(durations).strip() + expose['durations_unformatted'] = durations return expose + def get_distances_and_durations(self, address) -> Dict[str, DistanceElement]: + """Return a dict mapping location names to distances and durations""" + out = {} + for duration in self.config.get('durations', []): + if 'destination' not in duration or 'name' not in duration or 'modes' not in duration: + logger.warning('illegal duration configuration: %s', duration) + continue + dest = duration.get('destination') + name = duration.get('name') + for mode in duration.get('modes', []): + if 'gm_id' in mode and 'title' in mode \ + and 'key' in self.config.get('google_maps_api', {}): + duration = self._get_gmaps_distance(address, dest, mode['gm_id']) + out[name] = duration + return out + def get_formatted_durations(self, address): """Return a formatted list of GoogleMaps durations""" - out = "" - for duration in self.config.get('durations', []): - if 'destination' in duration and 'name' in duration: - dest = duration.get('destination') - name = duration.get('name') - for mode in duration.get('modes', []): - if 'gm_id' in mode and 'title' in mode \ - and 'key' in self.config.get('google_maps_api', {}): - duration = self.get_gmaps_distance(address, dest, mode['gm_id']) - title = mode['title'] - out += f"> {name} ({title}): {duration}\n" + durations = self.get_distances_and_durations(address) + return self._format_durations(durations) + def _format_durations(self, durations: Dict[str, DistanceElement]): + out = "" + for location_name, val in durations.items(): + out += f"> {location_name} ({val.mode.value}): " + \ + f"{val.duration.text} ({val.distance.text})\n" return out.strip() - def get_gmaps_distance(self, address, dest, mode): + def _get_gmaps_distance(self, address, dest, mode) -> DistanceElement | None: """Get the distance""" # get timestamp for next monday at 9:00:00 o'clock now = datetime.datetime.today().replace(hour=9, minute=0, second=0) @@ -54,11 +70,10 @@ def get_gmaps_distance(self, address, dest, mode): base_url = self.config.get('google_maps_api', {}).get('url') gm_key = self.config.get('google_maps_api', {}).get('key') - if not gm_key and mode != self.GM_MODE_DRIVING: + if not gm_key and mode != TransportationModes.DRIVING: logger.warning("No Google Maps API key configured and without using a mode " - "different from 'driving' is not allowed. " - "Downgrading to mode 'drinving' thus. ") - mode = 'driving' + "different from 'driving' is not allowed. Thus downgrading to mode 'driving'.") + mode = TransportationModes.DRIVING base_url = base_url.replace('&key={key}', '') # retrieve the result @@ -82,7 +97,14 @@ def get_gmaps_distance(self, address, dest, mode): element['distance']['text'], element['duration']['text'], element['duration']['value']) - duration_text = element['duration']['text'] - distance_text = element['distance']['text'] - distances[element['duration']['value']] = f"{duration_text} ({distance_text})" + distance_element = DistanceElement( + duration=DurationValueTuple( + float(element['duration']['value']), + element['duration']['text']), + distance=DistanceValueTuple( + float(element['distance']['value']), + element['distance']['text']), + mode=TransportationModes(mode) + ) + distances[distance_element.distance.meters] = distance_element return distances[min(distances.keys())] if distances else None diff --git a/flathunter/hunter.py b/flathunter/hunter.py index 692b9996..0c1c77c1 100644 --- a/flathunter/hunter.py +++ b/flathunter/hunter.py @@ -5,10 +5,11 @@ from flathunter.logging import logger from flathunter.config import YamlConfig -from flathunter.filter import Filter +from flathunter.filter import FilterChain from flathunter.processor import ProcessorChain from flathunter.captcha.captcha_solver import CaptchaUnsolvableError from flathunter.exceptions import ConfigException +from flathunter.dataclasses import FilterChainName class Hunter: """Basic methods for crawling and processing / filtering exposes""" @@ -38,16 +39,14 @@ def try_crawl(searcher, url, max_pages): def hunt_flats(self, max_pages=None): """Crawl, process and filter exposes""" - filter_set = Filter.builder() \ - .read_config(self.config) \ - .filter_already_seen(self.id_watch) \ - .build() - + preprocess_filter_chain = self._build_preprocess_filter_chain(self.config) + postprocess_filter_chain = self._build_postprocess_filter_chain(self.config) processor_chain = ProcessorChain.builder(self.config) \ .save_all_exposes(self.id_watch) \ - .apply_filter(filter_set) \ + .apply_filter(preprocess_filter_chain) \ .resolve_addresses() \ .calculate_durations() \ + .apply_filter(postprocess_filter_chain) \ .send_messages() \ .build() @@ -58,3 +57,14 @@ def hunt_flats(self, max_pages=None): result.append(expose) return result + + def _build_preprocess_filter_chain(self, config) -> FilterChain: + return FilterChain.builder() \ + .read_config(config, FilterChainName.PREPROCESS) \ + .filter_already_seen(self.id_watch) \ + .build() + + def _build_postprocess_filter_chain(self, config) -> FilterChain: + return FilterChain.builder() \ + .read_config(config, FilterChainName.POSTPROCESS) \ + .build() diff --git a/flathunter/web/views.py b/flathunter/web/views.py index 5a5926a0..6559e57c 100644 --- a/flathunter/web/views.py +++ b/flathunter/web/views.py @@ -6,10 +6,11 @@ from flask import render_template, jsonify, request, session, redirect from flask_api import status +from flathunter.dataclasses import FilterChainName from flathunter.web import app, log from flathunter.web.util import sanitize_float -from flathunter.filter import FilterBuilder +from flathunter.filter import FilterChainBuilder from flathunter.config import YamlConfig class AuthenticationError(Exception): @@ -72,7 +73,12 @@ def filter_for_user(): """Load the filter for the current user""" if filter_values_for_user() is None: return None - return FilterBuilder().read_config(YamlConfig({'filters': filter_values_for_user()})).build() + return (FilterChainBuilder() + .read_config( + YamlConfig({'filters': filter_values_for_user()}), FilterChainName.PREPROCESS) + .read_config( + YamlConfig({'filters': filter_values_for_user()}), FilterChainName.POSTPROCESS) + .build()) def form_filter_values(): """Extract the filter settings from the submitted form""" diff --git a/flathunter/web_hunter.py b/flathunter/web_hunter.py index 41bab501..9a1551e0 100644 --- a/flathunter/web_hunter.py +++ b/flathunter/web_hunter.py @@ -2,7 +2,6 @@ from flathunter.config import YamlConfig from flathunter.logging import logger from flathunter.hunter import Hunter -from flathunter.filter import Filter from flathunter.processor import ProcessorChain from flathunter.exceptions import BotBlockedException, UserDeactivatedException @@ -13,17 +12,19 @@ class WebHunter(Hunter): def hunt_flats(self, max_pages=1): """Crawl all URLs, and send notifications to users of new flats""" - filter_set = Filter.builder() \ - .read_config(self.config) \ - .filter_already_seen(self.id_watch) \ - .build() - + preprocess_filter_chain = self._build_preprocess_filter_chain(self.config) + postprocess_filter_chain = self._build_postprocess_filter_chain(self.config) + # note: we have to save all exposes *after* applying the processors because + # the exposes later get loaded from disk to then be filtered again, so we need + # the additional information from the processor lest the postprocess chain breaks + # due to missing data processor_chain = ProcessorChain.builder(self.config) \ - .apply_filter(filter_set) \ + .apply_filter(preprocess_filter_chain) \ .crawl_expose_details() \ - .save_all_exposes(self.id_watch) \ .resolve_addresses() \ .calculate_durations() \ + .save_all_exposes(self.id_watch) \ + .apply_filter(postprocess_filter_chain) \ .send_messages() \ .build() @@ -34,10 +35,12 @@ def hunt_flats(self, max_pages=1): for (user_id, settings) in self.id_watch.get_user_settings(): if 'mute_notifications' in settings: continue - filter_set = Filter.builder().read_config(YamlConfig(settings)).build() + preprocess_filter_chain = self._build_preprocess_filter_chain(YamlConfig(settings)) + postprocess_filter_chain = self._build_postprocess_filter_chain(YamlConfig(settings)) try: processor_chain = ProcessorChain.builder(self.config) \ - .apply_filter(filter_set) \ + .apply_filter(preprocess_filter_chain) \ + .apply_filter(postprocess_filter_chain) \ .send_messages([user_id]) \ .build() for message in processor_chain.process(new_exposes): diff --git a/test/test_config.py b/test/test_config.py index 0804a1e1..8d917f34 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -58,30 +58,34 @@ def test_loads_config(self): os.remove("config.yaml") def test_loads_config_at_file(self): - with tempfile.NamedTemporaryFile(mode='w+') as temp: - temp.write(self.DUMMY_CONFIG) - temp.flush() - config = Config(temp.name) - self.assertTrue(len(config.get('urls', [])) > 0, "Expected URLs in config file") + tempFileName = None + try: + with tempfile.NamedTemporaryFile(mode='w+', delete=False) as temp: + temp.write(self.DUMMY_CONFIG) + temp.flush() + tempFileName = temp.name + config = Config(tempFileName) + self.assertTrue(len(config.get('urls', [])) > 0, "Expected URLs in config file") + finally: + if tempFileName is not None: + os.unlink(tempFileName) def test_loads_config_from_string(self): config = StringConfig(string=self.EMPTY_FILTERS_CONFIG) - self.assertIsNotNone(config) - my_filter = config.get_filter() - self.assertIsNotNone(my_filter) + target_urls = config.target_urls() + self.assertIsNotNone(target_urls) def test_loads_legacy_config_from_string(self): config = StringConfig(string=self.LEGACY_FILTERS_CONFIG) - self.assertIsNotNone(config) - my_filter = config.get_filter() + my_filter = config.excluded_titles() self.assertIsNotNone(my_filter) - self.assertTrue(len(my_filter.filters) > 0) + self.assertTrue(len(my_filter) > 0) def test_loads_filters_config_from_string(self): config = StringConfig(string=self.FILTERS_CONFIG) - self.assertIsNotNone(config) - my_filter = config.get_filter() + my_filter = config.excluded_titles() self.assertIsNotNone(my_filter) + self.assertTrue(len(my_filter) > 0) def test_defaults_fields(self): config = StringConfig(string=self.FILTERS_CONFIG) diff --git a/test/test_gmaps_duration_processor.py b/test/test_gmaps_duration_processor.py index a35b1cdf..15a780ea 100644 --- a/test/test_gmaps_duration_processor.py +++ b/test/test_gmaps_duration_processor.py @@ -1,6 +1,7 @@ +from typing import Dict import unittest -import yaml import re +from flathunter.gmaps_duration_processor import DistanceElement import requests_mock from flathunter.hunter import Hunter from flathunter.idmaintainer import IdMaintainer @@ -33,6 +34,20 @@ class GMapsDurationProcessorTest(unittest.TestCase): - gm_id: driving title: Car """ + DUMMY_RESPONSE = """ + { + "status": "OK", + "rows": [ + { + "elements": [ + { + "distance": { "text": "228 mi", "value": 367654 }, + "duration": { "text": "3 hours 55 mins", "value": 14078 } + } + ] + } + ] + }""" @requests_mock.Mocker() def test_resolve_durations(self, m): @@ -40,11 +55,25 @@ def test_resolve_durations(self, m): config.set_searchers([DummyCrawler()]) hunter = Hunter(config, IdMaintainer(":memory:")) matcher = re.compile('maps.googleapis.com/maps/api/distancematrix/json') - m.get(matcher, text='{"status": "OK", "rows": [ { "elements": [ { "distance": { "text": "far", "value": 123 }, "duration": { "text": "days", "value": 123 } } ] } ]}') + m.get(matcher, text=self.DUMMY_RESPONSE) exposes = hunter.hunt_flats() self.assertTrue(count(exposes) > 4, "Expected to find exposes") without_durations = list(filter(lambda expose: 'durations' not in expose, exposes)) - if len(without_durations) > 0: - for expose in without_durations: - print("Got expose: ", expose) - self.assertTrue(len(without_durations) == 0, "Expected durations to be calculated") \ No newline at end of file + for expose in without_durations: + print("Got expose: ", expose) + self.assertTrue(len(without_durations) == 0, "Expected durations to be calculated") + + @requests_mock.Mocker() + def test_durations_valid(self, m): + config = StringConfig(string=self.DUMMY_CONFIG) + config.set_searchers([DummyCrawler()]) + hunter = Hunter(config, IdMaintainer(":memory:")) + matcher = re.compile('maps.googleapis.com/maps/api/distancematrix/json') + m.get(matcher, text=self.DUMMY_RESPONSE) + exposes = hunter.hunt_flats() + for expose in exposes: + durations: Dict[str, DistanceElement] = expose['durations_unformatted'] + for duration in durations.values(): + self.assertAlmostEqual(duration.distance.meters, 367654) + self.assertAlmostEqual(duration.duration.seconds, 14078) + self.assertEqual(duration.duration.text, '3 hours 55 mins') diff --git a/test/test_googlecloud_idmaintainer.py b/test/test_googlecloud_idmaintainer.py index b3788e9a..06cacda1 100644 --- a/test/test_googlecloud_idmaintainer.py +++ b/test/test_googlecloud_idmaintainer.py @@ -7,7 +7,8 @@ from flathunter.googlecloud_idmaintainer import GoogleCloudIdMaintainer from flathunter.hunter import Hunter from flathunter.web_hunter import WebHunter -from flathunter.filter import Filter +from flathunter.filter import FilterChain +from flathunter.dataclasses import FilterChainName from test.dummy_crawler import DummyCrawler from test.test_util import count from test.utils.config import StringConfig @@ -94,7 +95,10 @@ def test_exposes_are_returned_filtered(id_watch): hunter = Hunter(config, id_watch) hunter.hunt_flats() hunter.hunt_flats() - filter = Filter.builder().read_config(StringConfig('{"filters":{"max_size":70}}')).build() + filter = (FilterChain + .builder() + .read_config(StringConfig('{"filters":{"max_size":70}}'), FilterChainName.PREPROCESS) + .build()) saved = id_watch.get_recent_exposes(10, filter_set=filter) assert len(saved) == 10 for expose in saved: diff --git a/test/test_id_maintainer.py b/test/test_id_maintainer.py index 2d4cdc75..796b9a90 100644 --- a/test/test_id_maintainer.py +++ b/test/test_id_maintainer.py @@ -6,7 +6,8 @@ from flathunter.idmaintainer import IdMaintainer from flathunter.hunter import Hunter from flathunter.web_hunter import WebHunter -from flathunter.filter import Filter +from flathunter.filter import FilterChain +from flathunter.dataclasses import FilterChainName from test.dummy_crawler import DummyCrawler from test.test_util import count from test.utils.config import StringConfig @@ -111,7 +112,10 @@ def test_exposes_are_returned_filtered(): hunter = Hunter(config, id_watch) hunter.hunt_flats() hunter.hunt_flats() - filter = Filter.builder().read_config(StringConfig('{"filters":{"max_size":70}}')).build() + filter = (FilterChain + .builder() + .read_config(StringConfig('{"filters":{"max_size":70}}'), FilterChainName.PREPROCESS) + .build()) saved = id_watch.get_recent_exposes(10, filter_set=filter) assert len(saved) == 10 for expose in saved: