Skip to content

Commit

Permalink
Run black
Browse files Browse the repository at this point in the history
  • Loading branch information
neon-ninja committed Aug 31, 2022
1 parent c0e38bf commit 345db7a
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 15 deletions.
2 changes: 1 addition & 1 deletion facebook_scraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ def get_groups_by_search(
**kwargs,
):
"""Searches Facebook groups and yields ids for each result
on the first page"""
on the first page"""
_scraper.requests_kwargs['timeout'] = kwargs.pop('timeout', DEFAULT_REQUESTS_TIMEOUT)
cookies = kwargs.pop('cookies', None)
set_cookies(cookies)
Expand Down
28 changes: 14 additions & 14 deletions facebook_scraper/facebook_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ class FacebookScraper:

base_url = FB_MOBILE_BASE_URL
default_headers = {
"Accept": "*/*",
"Connection": "keep-alive",
"Accept-Encoding": "gzip,deflate",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8"
}
"Accept": "*/*",
"Connection": "keep-alive",
"Accept-Encoding": "gzip,deflate",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8",
}
have_checked_locale = False

def __init__(self, session=None, requests_kwargs=None):
Expand Down Expand Up @@ -728,7 +728,7 @@ def get_group_info(self, group, **kwargs) -> Profile:
resp = self.get(url).html
try:
url = resp.find("a[href*='?view=info']", first=True).attrs["href"]
url += "&sfd=1" # Add parameter to get full "about"-text
url += "&sfd=1" # Add parameter to get full "about"-text
except AttributeError:
raise exceptions.UnexpectedResponse("Unable to resolve view=info URL")
logger.debug(f"Requesting page from: {url}")
Expand All @@ -747,18 +747,19 @@ def get_group_info(self, group, **kwargs) -> Profile:
try:
# Directly tageting the weird generated class names is not optimal, but it's the best i could do.
about_div = resp.find("._52jc._55wr", first=True)

# Removing the <wbr>-tags that are converted to linebreaks by .text
from requests_html import HTML

# Removing the <wbr>-tags that are converted to linebreaks by .text
from requests_html import HTML

no_word_breaks = HTML(html=about_div.html.replace("<wbr/>", ""))

result["about"] = no_word_breaks.text
except:
result["about"] = None

url = members.find("a", first=True).attrs.get("href")
logger.debug(f"Requesting page from: {url}")

try:
resp = self.get(url).html
url = resp.find("a[href*='listType=list_admin_moderator']", first=True)
Expand Down Expand Up @@ -1129,11 +1130,10 @@ def get_groups_by_search(self, word: str, **kwargs):
except AttributeError:
continue


@staticmethod
def find_group_id(button_id, raw_html):
"""Each group button has an id, which appears later in the script
tag followed by the group id."""
s = raw_html[raw_html.rfind(button_id) :]
group_id = s[s.find("result_id:") :].split(",")[0].split(":")[1]
return int(group_id)
return int(group_id)

0 comments on commit 345db7a

Please sign in to comment.