wikipedia
This commit is contained in:
parent
8f7afc28a2
commit
52da575c66
55
wikipedia
55
wikipedia
@ -3,26 +3,41 @@ import requests
|
|||||||
import sys
|
import sys
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
def connection_error():
|
# Fetch JSON from url and run it through transform, pretty printing errors
|
||||||
print("Could not contact wikipedia servers.")
|
# and the data worked on as exhaustively as possible.
|
||||||
sys.exit(1)
|
def json_query(url, transform, params={}):
|
||||||
|
try:
|
||||||
|
result = requests.get(url, params)
|
||||||
|
except ConnectionError:
|
||||||
|
print("Network connection error.")
|
||||||
|
sys.exit(1)
|
||||||
|
try:
|
||||||
|
data = result.json()
|
||||||
|
except JSONDecodeError as err:
|
||||||
|
print('Error when decoding JSON:\nFrom endpoint ' + url + ':\n' + err + '\n' + result + '\n')
|
||||||
|
sys.exit(1)
|
||||||
|
try:
|
||||||
|
return transform(data)
|
||||||
|
except (IndexError, KeyError) as err:
|
||||||
|
print('Error when traversing JSON:\nFrom endpoint ' + url + ':\n' + err)
|
||||||
|
pprint.PrettyPrinter(indent=2).pprint(data)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
def page_search(string):
|
# Search wikipedia for string, returning at most max_results results
|
||||||
|
# or the empty list if no matches where returned.
|
||||||
|
def page_search(string, max_results):
|
||||||
params = {
|
params = {
|
||||||
'q' : string,
|
'q' : string,
|
||||||
'limit' : 1
|
'limit' : max_results
|
||||||
}
|
}
|
||||||
try:
|
return json_query('https://en.wikipedia.org/w/rest.php/v1/search/page', lambda data: data['pages'], params)
|
||||||
return requests.get('https://en.wikipedia.org/w/rest.php/v1/search/page', params).json()['pages']
|
|
||||||
except ConnectionError:
|
|
||||||
connection_error()
|
|
||||||
|
|
||||||
|
# Get a JSON object for the titled page, containing page metadata and a text summary.
|
||||||
def get_page_with_summary(title):
|
def get_page_with_summary(title):
|
||||||
try:
|
return json_query('https://en.wikipedia.org/api/rest_v1/page/summary/' + title, lambda data: data)
|
||||||
return requests.get('https://en.wikipedia.org/api/rest_v1/page/summary/' + title).json()
|
|
||||||
except ConnectionError:
|
|
||||||
connection_error()
|
|
||||||
|
|
||||||
|
# Get a list of the links from a page. For a disambiguation page, this means
|
||||||
|
# a list of the links to individual pages.
|
||||||
def get_page_links(title):
|
def get_page_links(title):
|
||||||
params = {
|
params = {
|
||||||
'action' : 'query',
|
'action' : 'query',
|
||||||
@ -30,19 +45,17 @@ def get_page_links(title):
|
|||||||
'prop' : 'links',
|
'prop' : 'links',
|
||||||
'format' : 'json'
|
'format' : 'json'
|
||||||
}
|
}
|
||||||
try:
|
return json_query('https://en.wikipedia.org/w/api.php', lambda data: list(data['query']['pages'].values())[0]['links'], params)
|
||||||
return list(requests.get('https://en.wikipedia.org/w/api.php', params).json()['query']['pages'].values())[0]['links']
|
|
||||||
except ConnectionError:
|
|
||||||
connection_error()
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if not sys.argv[1:]:
|
arg = ' '.join(sys.argv[1:])
|
||||||
|
if not arg:
|
||||||
print("Usage: wikipedia <list of search terms>")
|
print("Usage: wikipedia <list of search terms>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
result = page_search(' '.join(sys.argv[1:]))
|
results = page_search(arg, 1)
|
||||||
if result:
|
if results:
|
||||||
page = get_page_with_summary(result[0]['title'])
|
page = get_page_with_summary(results[0]['title'])
|
||||||
if page['type'] == 'disambiguation':
|
if page['type'] == 'disambiguation':
|
||||||
print('Ambiguous result, please clarify:\n ' + '\n '.join([link['title'] for link in get_page_links(page['title'])]))
|
print('Ambiguous result, please clarify:\n ' + '\n '.join([link['title'] for link in get_page_links(page['title'])]))
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user