|
|
@@ -3,26 +3,41 @@ import requests |
|
|
|
import sys |
|
|
|
import textwrap |
|
|
|
|
|
|
|
def connection_error(): |
|
|
|
print("Could not contact wikipedia servers.") |
|
|
|
sys.exit(1) |
|
|
|
# Fetch JSON from url and run it through transform, pretty printing errors |
|
|
|
# and the data worked on as exhaustively as possible. |
|
|
|
def json_query(url, transform, params={}): |
|
|
|
try: |
|
|
|
result = requests.get(url, params) |
|
|
|
except ConnectionError: |
|
|
|
print("Network connection error.") |
|
|
|
sys.exit(1) |
|
|
|
try: |
|
|
|
data = result.json() |
|
|
|
except JSONDecodeError as err: |
|
|
|
print('Error when decoding JSON:\nFrom endpoint ' + url + ':\n' + err + '\n' + result + '\n') |
|
|
|
sys.exit(1) |
|
|
|
try: |
|
|
|
return transform(data) |
|
|
|
except (IndexError, KeyError) as err: |
|
|
|
print('Error when traversing JSON:\nFrom endpoint ' + url + ':\n' + err) |
|
|
|
pprint.PrettyPrinter(indent=2).pprint(data) |
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
def page_search(string): |
|
|
|
# Search wikipedia for string, returning at most max_results results |
|
|
|
# or the empty list if no matches where returned. |
|
|
|
def page_search(string, max_results): |
|
|
|
params = { |
|
|
|
'q' : string, |
|
|
|
'limit' : 1 |
|
|
|
'limit' : max_results |
|
|
|
} |
|
|
|
try: |
|
|
|
return requests.get('https://en.wikipedia.org/w/rest.php/v1/search/page', params).json()['pages'] |
|
|
|
except ConnectionError: |
|
|
|
connection_error() |
|
|
|
return json_query('https://en.wikipedia.org/w/rest.php/v1/search/page', lambda data: data['pages'], params) |
|
|
|
|
|
|
|
# Get a JSON object for the titled page, containing page metadata and a text summary. |
|
|
|
def get_page_with_summary(title): |
|
|
|
try: |
|
|
|
return requests.get('https://en.wikipedia.org/api/rest_v1/page/summary/' + title).json() |
|
|
|
except ConnectionError: |
|
|
|
connection_error() |
|
|
|
return json_query('https://en.wikipedia.org/api/rest_v1/page/summary/' + title, lambda data: data) |
|
|
|
|
|
|
|
# Get a list of the links from a page. For a disambiguation page, this means |
|
|
|
# a list of the links to individual pages. |
|
|
|
def get_page_links(title): |
|
|
|
params = { |
|
|
|
'action' : 'query', |
|
|
@@ -30,19 +45,17 @@ def get_page_links(title): |
|
|
|
'prop' : 'links', |
|
|
|
'format' : 'json' |
|
|
|
} |
|
|
|
try: |
|
|
|
return list(requests.get('https://en.wikipedia.org/w/api.php', params).json()['query']['pages'].values())[0]['links'] |
|
|
|
except ConnectionError: |
|
|
|
connection_error() |
|
|
|
return json_query('https://en.wikipedia.org/w/api.php', lambda data: list(data['query']['pages'].values())[0]['links'], params) |
|
|
|
|
|
|
|
def main(): |
|
|
|
if not sys.argv[1:]: |
|
|
|
arg = ' '.join(sys.argv[1:]) |
|
|
|
if not arg: |
|
|
|
print("Usage: wikipedia <list of search terms>") |
|
|
|
sys.exit(1) |
|
|
|
else: |
|
|
|
result = page_search(' '.join(sys.argv[1:])) |
|
|
|
if result: |
|
|
|
page = get_page_with_summary(result[0]['title']) |
|
|
|
results = page_search(arg, 1) |
|
|
|
if results: |
|
|
|
page = get_page_with_summary(results[0]['title']) |
|
|
|
if page['type'] == 'disambiguation': |
|
|
|
print('Ambiguous result, please clarify:\n ' + '\n '.join([link['title'] for link in get_page_links(page['title'])])) |
|
|
|
else: |
|
|
|