From 9c11fabfad4ad989b724d3d5fa1efb611902e078 Mon Sep 17 00:00:00 2001 From: Owen Date: Wed, 4 Dec 2024 21:32:11 -0600 Subject: [PATCH] v3.2.0 --- pyproject.toml | 2 +- test/dev_testing.ipynb | 234 ++---------------------------- test/second_tester_notebook.ipynb | 131 ++++++++--------- 3 files changed, 76 insertions(+), 291 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f7fd742..561333e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] # -- Metadata -------------------------------------------------------------------------------------- name = "ScraperFC" -version = "3.1.2" +version = "3.2.0" description = "Package for scraping soccer data from a variety of sources." readme = "README.md" license = {file = "LICENSE"} diff --git a/test/dev_testing.ipynb b/test/dev_testing.ipynb index 81ef6ec..9c7fa8d 100644 --- a/test/dev_testing.ipynb +++ b/test/dev_testing.ipynb @@ -23,242 +23,35 @@ "from selenium.webdriver.common.by import By\n", "from bs4 import BeautifulSoup\n", "from io import StringIO\n", - "import pandas as pd" + "import pandas as pd\n", + "import requests\n", + "from tqdm import tqdm\n", + "import time" ] }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 6, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Changed currency to usd.\n" + "c:\\Users\\osmou\\Documents\\GitHub\\ScraperFC\\test\\../src\\ScraperFC\\transfermarkt.py:97: UserWarning: No club links table found for 1901/02 Jupiler Pro League. Returning empty list.\n", + " warrnings.warn(\n" ] } ], "source": [ - "cap = sfc.Capology()\n", - "salaries = cap.scrape_salaries(\"2022-23\", \"Bundesliga\", \"usd\")" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
EST. BASE SALARYBIO
PLAYERGROSS P/W\\n(USD)GROSS P/Y\\n(USD)ADJ. GROSS\\n(USD)POS.AGECOUNTRYCLUB
0Sadio Mané$ 455,932$ 23,708,454$ 23,708,454F30SenegalBayern Munich
1Manuel Neuer$ 435,208$ 22,630,797$ 22,630,797K36GermanyBayern Munich
2Thomas Müller$ 424,846$ 22,091,968$ 22,091,968F33GermanyBayern Munich
3Joshua Kimmich$ 404,121$ 21,014,312$ 21,014,312M27GermanyBayern Munich
4Serge Gnabry$ 391,065$ 20,335,388$ 20,335,388F27GermanyBayern Munich
...........................
566Florian Schock$ 622$ 32,330$ 32,330K21GermanyStuttgart
567Linus Gechter$ 414$ 21,553$ 21,553D18GermanyHertha Berlin
568Naouirou Ahamada$ 414$ 21,553$ 21,553M20FranceStuttgart
569Daniel Klein$ 414$ 21,553$ 21,553K21GermanyAugsburg
570Marcel Sabitzer$ 0$ 0$ 0M28AustriaBayern Munich
\n", - "

571 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " EST. BASE SALARY \\\n", - " PLAYER GROSS P/W\\n(USD) GROSS P/Y\\n(USD) ADJ. GROSS\\n(USD) \n", - "0 Sadio Mané $ 455,932 $ 23,708,454 $ 23,708,454 \n", - "1 Manuel Neuer $ 435,208 $ 22,630,797 $ 22,630,797 \n", - "2 Thomas Müller $ 424,846 $ 22,091,968 $ 22,091,968 \n", - "3 Joshua Kimmich $ 404,121 $ 21,014,312 $ 21,014,312 \n", - "4 Serge Gnabry $ 391,065 $ 20,335,388 $ 20,335,388 \n", - ".. ... ... ... ... \n", - "566 Florian Schock $ 622 $ 32,330 $ 32,330 \n", - "567 Linus Gechter $ 414 $ 21,553 $ 21,553 \n", - "568 Naouirou Ahamada $ 414 $ 21,553 $ 21,553 \n", - "569 Daniel Klein $ 414 $ 21,553 $ 21,553 \n", - "570 Marcel Sabitzer $ 0 $ 0 $ 0 \n", - "\n", - " BIO \n", - " POS. AGE COUNTRY CLUB \n", - "0 F 30 Senegal Bayern Munich \n", - "1 K 36 Germany Bayern Munich \n", - "2 F 33 Germany Bayern Munich \n", - "3 M 27 Germany Bayern Munich \n", - "4 F 27 Germany Bayern Munich \n", - ".. ... .. ... ... \n", - "566 K 21 Germany Stuttgart \n", - "567 D 18 Germany Hertha Berlin \n", - "568 M 20 France Stuttgart \n", - "569 K 21 Germany Augsburg \n", - "570 M 28 Austria Bayern Munich \n", - "\n", - "[571 rows x 8 columns]" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "salaries" + "tm = sfc.Transfermarkt()\n", + "club_links = tm.get_club_links(\"1901/02\", \"Jupiler Pro League\")" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -273,11 +66,6 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.0" - }, - "vscode": { - "interpreter": { - "hash": "585a938ec471c889bf0cce0aed741a99eaf47ca09c0fa8393793bc5bfe77ba11" - } } }, "nbformat": 4, diff --git a/test/second_tester_notebook.ipynb b/test/second_tester_notebook.ipynb index ea1ddc2..2dac432 100644 --- a/test/second_tester_notebook.ipynb +++ b/test/second_tester_notebook.ipynb @@ -2,105 +2,107 @@ "cells": [ { "cell_type": "code", - "execution_count": 49, + "execution_count": 1, "id": "84efcaf0", "metadata": {}, "outputs": [], "source": [ - "import cloudscraper\n", - "from bs4 import BeautifulSoup\n", - "\n", - "TRANSFERMARKT_ROOT = 'https://www.transfermarkt.us'" + "%load_ext autoreload\n", + "%autoreload 2" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 2, "id": "0ef1e920", "metadata": {}, "outputs": [], "source": [ - "scraper = cloudscraper.CloudScraper()" + "import sys\n", + "sys.path.append('../src')\n", + "import ScraperFC as sfc\n", + "from tqdm import tqdm\n", + "import random" ] }, { "cell_type": "code", - "execution_count": 40, - "id": "24dae353", + "execution_count": 3, + "id": "91f84cbf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'2024': '2023', '2023': '2022', '2022': '2021', '2021': '2020', '2020': '2019', '2019': '2018', '2018': '2017', '2017': '2016', '2016': '2015', '2015': '2014', '2014': '2013', '2013': '2012', '2012': '2011', '2011': '2010', '2010': '2009', '2009': '2008', '2008': '2007', '2007': '2006', '2006': '2005', '2005': '2004', '2004': '2003', '2003': '2002', '2002': '2001', '2001': '2000', '2000': '1999', '1999': '1998', '1998': '1997', '1997': '1996', '1996': '1995'}\n" + "Running\n" ] - } - ], - "source": [ - "# Get valid seasons\n", - "# url = 'https://www.transfermarkt.us/ligue-1/startseite/wettbewerb/FR1'\n", - "url = 'https://www.transfermarkt.us/major-league-soccer/startseite/wettbewerb/MLS1'\n", - "\n", - "soup = BeautifulSoup(scraper.get(url).content, 'html.parser')\n", - "valid_seasons = dict([(x.text, x['value']) \n", - " for x in soup.find('select', {'name': 'saison_id'}).find_all('option')])\n", - "print(valid_seasons)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "779d104d", - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "['https://www.transfermarkt.us/sporting-kansas-city/startseite/verein/4284/saison_id/2004', 'https://www.transfermarkt.us/new-york-red-bulls/startseite/verein/623/saison_id/2004', 'https://www.transfermarkt.us/new-england-revolution/startseite/verein/626/saison_id/2004', 'https://www.transfermarkt.us/columbus-crew-sc/startseite/verein/813/saison_id/2004', 'https://www.transfermarkt.us/d-c-united/startseite/verein/2440/saison_id/2004', 'https://www.transfermarkt.us/chicago-fire-fc/startseite/verein/432/saison_id/2004', 'https://www.transfermarkt.us/cd-chivas-usa/startseite/verein/6642/saison_id/2004', 'https://www.transfermarkt.us/real-salt-lake-city/startseite/verein/6643/saison_id/2004', 'https://www.transfermarkt.us/fc-dallas/startseite/verein/8816/saison_id/2004', 'https://www.transfermarkt.us/los-angeles-galaxy/startseite/verein/1061/saison_id/2004', 'https://www.transfermarkt.us/colorado-rapids/startseite/verein/1247/saison_id/2004', 'https://www.transfermarkt.us/san-jose-earthquakes/startseite/verein/218/saison_id/2004']\n" + "2024 Copa Libertadores: 100%|██████████| 15/15 [02:31<00:00, 10.13s/it]\n", + "2023 Copa Libertadores: 100%|██████████| 15/15 [02:24<00:00, 9.64s/it]\n", + "2022 Copa Libertadores: 100%|██████████| 15/15 [02:24<00:00, 9.63s/it]\n", + "2021 Copa Libertadores: 100%|██████████| 15/15 [02:21<00:00, 9.44s/it]\n", + "2020 Copa Libertadores: 100%|██████████| 15/15 [02:27<00:00, 9.86s/it]\n", + "2019 Copa Libertadores: 100%|██████████| 15/15 [02:25<00:00, 9.73s/it]\n", + "2018 Copa Libertadores: 100%|██████████| 15/15 [02:13<00:00, 8.91s/it]\n", + "2017 Copa Libertadores: 100%|██████████| 15/15 [02:07<00:00, 8.48s/it]\n", + "2016 Copa Libertadores: 100%|██████████| 13/13 [01:53<00:00, 8.74s/it]\n", + "2015 Copa Libertadores: 100%|██████████| 13/13 [02:05<00:00, 9.64s/it]\n", + "2014 Copa Libertadores: 100%|██████████| 13/13 [01:50<00:00, 8.49s/it]\n", + "2024-2025 Champions League: 100%|██████████| 18/18 [02:40<00:00, 8.92s/it]\n", + "2023-2024 Champions League: 100%|██████████| 21/21 [03:33<00:00, 10.18s/it]\n", + "2022-2023 Champions League: 100%|██████████| 21/21 [03:20<00:00, 9.53s/it]\n", + "2021-2022 Champions League: 100%|██████████| 21/21 [03:25<00:00, 9.80s/it]\n", + "2020-2021 Champions League: 100%|██████████| 17/17 [02:37<00:00, 9.27s/it]\n", + "2019-2020 Champions League: 100%|██████████| 21/21 [03:19<00:00, 9.48s/it]\n", + "2018-2019 Champions League: 100%|██████████| 21/21 [03:22<00:00, 9.63s/it]\n", + "2017-2018 Champions League: 100%|██████████| 21/21 [03:32<00:00, 10.14s/it]\n", + "2016-2017 Champions League: 100%|██████████| 21/21 [02:50<00:00, 8.12s/it]\n", + "2015-2016 Champions League: 100%|██████████| 21/21 [03:09<00:00, 9.01s/it]\n", + "2014-2015 Champions League: 100%|██████████| 21/21 [03:06<00:00, 8.89s/it]\n", + "2013-2014 Champions League: 100%|██████████| 21/21 [02:57<00:00, 8.47s/it]\n", + "2012-2013 Champions League: 100%|██████████| 21/21 [03:05<00:00, 8.86s/it]\n", + "2011-2012 Champions League: 100%|██████████| 21/21 [02:54<00:00, 8.30s/it]\n", + "2010-2011 Champions League: 100%|██████████| 21/21 [03:01<00:00, 8.66s/it]\n", + "2009-2010 Champions League: 76%|███████▌ | 16/21 [02:22<00:44, 8.88s/it]\n" ] - } - ], - "source": [ - "# Get club links\n", - "year = '2005'\n", - "\n", - "soup = BeautifulSoup(scraper.get(f'{url}/plus/?saison_id={valid_seasons[year]}').content, \n", - " 'html.parser')\n", - "club_els = (soup.find('table', {'class': 'items'})\n", - " .find_all('td', {'class': 'hauptlink no-border-links'}))\n", - "club_links = [TRANSFERMARKT_ROOT + x.find('a')['href'] for x in club_els]\n", - "print(club_links)" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "2bcd55f2", - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "['https://www.transfermarkt.us/thomas-kaminski/profil/spieler/77757', 'https://www.transfermarkt.us/thomas-kaminski/marktwertverlauf/spieler/77757', 'https://www.transfermarkt.us/tim-krul/profil/spieler/33027', 'https://www.transfermarkt.us/tim-krul/marktwertverlauf/spieler/33027', 'https://www.transfermarkt.us/jack-walton/profil/spieler/368629', 'https://www.transfermarkt.us/jack-walton/marktwertverlauf/spieler/368629', 'https://www.transfermarkt.us/james-shea/profil/spieler/91340', 'https://www.transfermarkt.us/james-shea/marktwertverlauf/spieler/91340', 'https://www.transfermarkt.us/teden-mengi/profil/spieler/548470', 'https://www.transfermarkt.us/teden-mengi/marktwertverlauf/spieler/548470', 'https://www.transfermarkt.us/tom-lockyer/profil/spieler/207742', 'https://www.transfermarkt.us/tom-lockyer/marktwertverlauf/spieler/207742', 'https://www.transfermarkt.us/gabriel-osho/profil/spieler/364409', 'https://www.transfermarkt.us/gabriel-osho/marktwertverlauf/spieler/364409', 'https://www.transfermarkt.us/mads-andersen/profil/spieler/407021', 'https://www.transfermarkt.us/mads-andersen/marktwertverlauf/spieler/407021', 'https://www.transfermarkt.us/reece-burke/profil/spieler/264220', 'https://www.transfermarkt.us/reece-burke/marktwertverlauf/spieler/264220', 'https://www.transfermarkt.us/amarii-bell/profil/spieler/278166', 'https://www.transfermarkt.us/amarii-bell/marktwertverlauf/spieler/278166', 'https://www.transfermarkt.us/dan-potts/profil/spieler/207037', 'https://www.transfermarkt.us/dan-potts/marktwertverlauf/spieler/207037', 'https://www.transfermarkt.us/issa-kabore/profil/spieler/649452', 'https://www.transfermarkt.us/issa-kabore/marktwertverlauf/spieler/649452', 'https://www.transfermarkt.us/daiki-hashioka/profil/spieler/387191', 'https://www.transfermarkt.us/daiki-hashioka/marktwertverlauf/spieler/387191', 'https://www.transfermarkt.us/marvelous-nakamba/profil/spieler/324882', 'https://www.transfermarkt.us/marvelous-nakamba/marktwertverlauf/spieler/324882', 'https://www.transfermarkt.us/pelly-ruddock-mpanzu/profil/spieler/244338', 'https://www.transfermarkt.us/pelly-ruddock-mpanzu/marktwertverlauf/spieler/244338', 'https://www.transfermarkt.us/albert-sambi-lokonga/profil/spieler/381967', 'https://www.transfermarkt.us/albert-sambi-lokonga/marktwertverlauf/spieler/381967', 'https://www.transfermarkt.us/ross-barkley/profil/spieler/131978', 'https://www.transfermarkt.us/ross-barkley/marktwertverlauf/spieler/131978', 'https://www.transfermarkt.us/jordan-clark/profil/spieler/184129', 'https://www.transfermarkt.us/jordan-clark/marktwertverlauf/spieler/184129', 'https://www.transfermarkt.us/luke-berry/profil/spieler/125685', 'https://www.transfermarkt.us/luke-berry/marktwertverlauf/spieler/125685', 'https://www.transfermarkt.us/elliot-thorpe/profil/spieler/496661', 'https://www.transfermarkt.us/elliot-thorpe/marktwertverlauf/spieler/496661', 'https://www.transfermarkt.us/alfie-doughty/profil/spieler/608175', 'https://www.transfermarkt.us/alfie-doughty/marktwertverlauf/spieler/608175', 'https://www.transfermarkt.us/fred-onyedinma/profil/spieler/305274', 'https://www.transfermarkt.us/fred-onyedinma/marktwertverlauf/spieler/305274', 'https://www.transfermarkt.us/chiedozie-ogbene/profil/spieler/392591', 'https://www.transfermarkt.us/chiedozie-ogbene/marktwertverlauf/spieler/392591', 'https://www.transfermarkt.us/tahith-chong/profil/spieler/344830', 'https://www.transfermarkt.us/tahith-chong/marktwertverlauf/spieler/344830', 'https://www.transfermarkt.us/andros-townsend/profil/spieler/61842', 'https://www.transfermarkt.us/andros-townsend/marktwertverlauf/spieler/61842', 'https://www.transfermarkt.us/carlton-morris/profil/spieler/246963', 'https://www.transfermarkt.us/carlton-morris/marktwertverlauf/spieler/246963', 'https://www.transfermarkt.us/elijah-adebayo/profil/spieler/319900', 'https://www.transfermarkt.us/elijah-adebayo/marktwertverlauf/spieler/319900', 'https://www.transfermarkt.us/jacob-brown/profil/spieler/469958', 'https://www.transfermarkt.us/jacob-brown/marktwertverlauf/spieler/469958', 'https://www.transfermarkt.us/cauley-woodrow/profil/spieler/169801', 'https://www.transfermarkt.us/cauley-woodrow/marktwertverlauf/spieler/169801']\n" + "ename": "UnicodeEncodeError", + "evalue": "'latin-1' codec can't encode character '\\u0107' in position 80328: ordinal not in range(256)", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mUnicodeEncodeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[3], line 11\u001b[0m\n\u001b[0;32m 9\u001b[0m match_links \u001b[38;5;241m=\u001b[39m random\u001b[38;5;241m.\u001b[39msample(match_links, \u001b[38;5;28mlen\u001b[39m(match_links) \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m10\u001b[39m)\n\u001b[0;32m 10\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m link \u001b[38;5;129;01min\u001b[39;00m tqdm(match_links, desc\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00myear\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mleague\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m---> 11\u001b[0m _ \u001b[38;5;241m=\u001b[39m \u001b[43mfb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscrape_match\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlink\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\osmou\\Documents\\GitHub\\ScraperFC\\test\\../src\\ScraperFC\\fbref.py:371\u001b[0m, in \u001b[0;36mFBref.scrape_match\u001b[1;34m(self, link)\u001b[0m\n\u001b[0;32m 368\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m`link` must be a string.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 370\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get(link)\n\u001b[1;32m--> 371\u001b[0m soup \u001b[38;5;241m=\u001b[39m BeautifulSoup(\u001b[43mr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontent\u001b[49m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhtml.parser\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 373\u001b[0m \u001b[38;5;66;03m# General match info\u001b[39;00m\n\u001b[0;32m 374\u001b[0m scorebox_meta_tag \u001b[38;5;241m=\u001b[39m soup\u001b[38;5;241m.\u001b[39mfind(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdiv\u001b[39m\u001b[38;5;124m\"\u001b[39m, {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclass\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscorebox_meta\u001b[39m\u001b[38;5;124m\"\u001b[39m})\n", + "File \u001b[1;32mc:\\Users\\osmou\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\botasaurus_requests\\response.py:171\u001b[0m, in \u001b[0;36mResponse.content\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 168\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[0;32m 169\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcontent\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mbytes\u001b[39m:\n\u001b[0;32m 170\u001b[0m \u001b[38;5;66;03m# note: this will convert the content to bytes on each access\u001b[39;00m\n\u001b[1;32m--> 171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mbytes\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraw\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;31mUnicodeEncodeError\u001b[0m: 'latin-1' codec can't encode character '\\u0107' in position 80328: ordinal not in range(256)" ] } ], "source": [ - "# Get player links from club link\n", - "club_link = 'https://www.transfermarkt.us/luton-town/startseite/verein/1031/saison_id/2023'\n", - "soup = BeautifulSoup(scraper.get(club_link).content, 'html.parser')\n", - "player_els = soup.find('table', {'class': 'items'}).find_all('td', {'class': 'hauptlink'})\n", - "player_links = [TRANSFERMARKT_ROOT + x.find('a')['href'] for x in player_els]\n", - "print(player_links)" + "from ScraperFC.fbref import comps\n", + "\n", + "fb = sfc.FBref()\n", + "\n", + "for league in comps.keys():\n", + " valid_years = fb.get_valid_seasons(league)\n", + " for year in valid_years:\n", + " match_links = fb.get_match_links(year, league)\n", + " match_links = random.sample(match_links, len(match_links) // 10)\n", + " for link in tqdm(match_links, desc=f\"{year} {league}\"):\n", + " _ = fb.scrape_match(link)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -115,11 +117,6 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.0" - }, - "vscode": { - "interpreter": { - "hash": "cc273e64c6c48e9881aab795f8d0e622d86c0ebe91aa99d17d2821087e5340fb" - } } }, "nbformat": 4,