-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_merriam_webster_data.py
129 lines (95 loc) · 4.76 KB
/
get_merriam_webster_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import requests
import json
import os
def get_merriam_webster_data(word, api_key):
"""
Retrieves pronunciation and definition data from the Merriam-Webster Elementary Dictionary API.
Args:
word: The word to look up.
api_key: Your Merriam-Webster API key.
Returns:
A dictionary containing pronunciation and definition data, or None if the API request fails
or the word is not found.
"""
url = f"https://www.dictionaryapi.com/api/v3/references/sd2/json/{word}?key={api_key}" # URL for the API request (Elementary dictionary api)
try:
response = requests.get(url)
#print(f"response.content: {response.content}") # Debug
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
data = response.json()
if not data or not isinstance(data[0], dict) or 'hwi' not in data[0]: # Error handling for invalid responses
print(f"Word '{word}' not found in Merriam-Webster or API response is invalid.")
return None
# Extract relevant information (pronunciation and definitions). Handle multiple pronunciations
pronunciations = []
if 'prs' in data[0]['hwi']: # Check if pronunciations exist
for pr in data[0]['hwi']['prs']:
pronunciation = pr.get('mw', '') # Get Merriam-Webster pronunciation
sound = pr.get('sound', {}) # Handle cases where 'sound' might be missing
audio = sound.get('audio', '')
# Create audio link (replace with your preferred audio format and region if needed)
# The detailed logic from the documentation is implemented below
if audio:
subdirectory = ""
if audio.startswith("bix"):
subdirectory = "bix"
elif audio.startswith("gg"):
subdirectory = "gg"
elif audio.startswith(tuple("0123456789_")): # Using a tuple for efficiency
subdirectory = "number"
else:
subdirectory = audio[0].lower()
audio_link = f"https://media.merriam-webster.com/audio/prons/en/us/mp3/{subdirectory}/{audio}.mp3" # Using MP3 as default
# Download the MP3 file
downloaded_file = download_mp3(audio_link, word)
pronunciations.append({
'pronunciation': pronunciation,
'audio_link': audio_link,
'local_audio_file': downloaded_file
})
else:
pronunciations.append({'pronunciation': pronunciation}) # Handle entries without audio
#Short definition is preferred for our usage
definitions = data[0].get("shortdef", [])
if not definitions: # Fall back to detailed definitions if shortdef is not available
definitions = [] # Use detailed definitions if shortdef is not available
for item in data[0].get("def", [{}])[0].get("sseq", []):
for sub_item in item: # handle subsenses
if isinstance(sub_item, list) and sub_item[0] == "sense":
definitions.extend([text[1] for text in sub_item[1].get("dt", []) if text[0] == "text"])
return {'pronunciations': pronunciations, 'definitions': definitions}
except requests.exceptions.RequestException as e:
print(f"Error making API request: {e}")
return None
except json.JSONDecodeError as e:
print(f"Error decoding JSON response: {e}")
return None
def download_mp3(url, word):
"""
Downloads the MP3 file from the given URL.
Args:
url: The URL of the MP3 file.
word: The word associated with the pronunciation.
Returns:
The path to the downloaded file, or None if download fails.
"""
try:
response = requests.get(url)
response.raise_for_status()
# Create a 'pronunciations' directory if it doesn't exist
os.makedirs('pronunciations', exist_ok=True)
# Save the file
filename = f"pronunciations/{word}.mp3"
with open(filename, 'wb') as file:
file.write(response.content)
print(f"Successfully downloaded pronunciation for '{word}'")
return filename
except requests.exceptions.RequestException as e:
print(f"Error downloading MP3: {e}")
return None
# Example usage (replace with your API key):
api_key = "413e9c1e-6813-49b5-b95a-aec07325f276"
word = "cow"
word_data = get_merriam_webster_data(word, api_key)
if word_data:
print(json.dumps(word_data, indent=4)) # Print formatted JSON data