wikipedia

This commit is contained in:
Victor Fors 2022-03-31 23:03:08 +02:00
parent 8f7afc28a2
commit 52da575c66

View File

@ -3,26 +3,41 @@ import requests
import sys import sys
import textwrap import textwrap
def connection_error(): # Fetch JSON from url and run it through transform, pretty printing errors
print("Could not contact wikipedia servers.") # and the data worked on as exhaustively as possible.
sys.exit(1) def json_query(url, transform, params={}):
try:
result = requests.get(url, params)
except ConnectionError:
print("Network connection error.")
sys.exit(1)
try:
data = result.json()
except JSONDecodeError as err:
print('Error when decoding JSON:\nFrom endpoint ' + url + ':\n' + err + '\n' + result + '\n')
sys.exit(1)
try:
return transform(data)
except (IndexError, KeyError) as err:
print('Error when traversing JSON:\nFrom endpoint ' + url + ':\n' + err)
pprint.PrettyPrinter(indent=2).pprint(data)
sys.exit(1)
def page_search(string): # Search wikipedia for string, returning at most max_results results
# or the empty list if no matches where returned.
def page_search(string, max_results):
params = { params = {
'q' : string, 'q' : string,
'limit' : 1 'limit' : max_results
} }
try: return json_query('https://en.wikipedia.org/w/rest.php/v1/search/page', lambda data: data['pages'], params)
return requests.get('https://en.wikipedia.org/w/rest.php/v1/search/page', params).json()['pages']
except ConnectionError:
connection_error()
# Get a JSON object for the titled page, containing page metadata and a text summary.
def get_page_with_summary(title): def get_page_with_summary(title):
try: return json_query('https://en.wikipedia.org/api/rest_v1/page/summary/' + title, lambda data: data)
return requests.get('https://en.wikipedia.org/api/rest_v1/page/summary/' + title).json()
except ConnectionError:
connection_error()
# Get a list of the links from a page. For a disambiguation page, this means
# a list of the links to individual pages.
def get_page_links(title): def get_page_links(title):
params = { params = {
'action' : 'query', 'action' : 'query',
@ -30,19 +45,17 @@ def get_page_links(title):
'prop' : 'links', 'prop' : 'links',
'format' : 'json' 'format' : 'json'
} }
try: return json_query('https://en.wikipedia.org/w/api.php', lambda data: list(data['query']['pages'].values())[0]['links'], params)
return list(requests.get('https://en.wikipedia.org/w/api.php', params).json()['query']['pages'].values())[0]['links']
except ConnectionError:
connection_error()
def main(): def main():
if not sys.argv[1:]: arg = ' '.join(sys.argv[1:])
if not arg:
print("Usage: wikipedia <list of search terms>") print("Usage: wikipedia <list of search terms>")
sys.exit(1) sys.exit(1)
else: else:
result = page_search(' '.join(sys.argv[1:])) results = page_search(arg, 1)
if result: if results:
page = get_page_with_summary(result[0]['title']) page = get_page_with_summary(results[0]['title'])
if page['type'] == 'disambiguation': if page['type'] == 'disambiguation':
print('Ambiguous result, please clarify:\n ' + '\n '.join([link['title'] for link in get_page_links(page['title'])])) print('Ambiguous result, please clarify:\n ' + '\n '.join([link['title'] for link in get_page_links(page['title'])]))
else: else: