Hacker News
Contents
Hacker News¶
Resources for using the Hacker News API
About¶
Key |
Value |
|---|---|
Source |
Hacker News API: Documentation and Samples for the Official HN API |
Website |
Import Libraries¶
External Libraries¶
from urllib.request import urlopen
import json
import pandas as pd
Get Stories¶
def get_stories(
category: str = 'topstories',
number: int = 5,
include_kids: bool = False
) -> pd.DataFrame:
"""Return a DataFrame of Hacker News Stories
Args:
category (str): topstories|newstories|beststories
number (int): total number of stories to return
include_kids (bool, optional): normalize with comments column. Defaults to False.
Returns:
pd.DataFrame: table of stories
"""
# Define url variables for stories
stories_list_base_url = 'https://hacker-news.firebaseio.com/v0/'
stories_list_prefix_url = category
stories_list_suffix_url = '.json'
stories_list_url = \
stories_list_base_url + \
stories_list_prefix_url + \
stories_list_suffix_url
# Get list of stories
response = urlopen(stories_list_url)
data_json = response.read().decode('utf-8')
stories_list = json.loads(data_json)[:number]
# Define url variables for individual story
story_base_url = 'https://hacker-news.firebaseio.com/v0/item/'
# story_prefix_url = '1'
story_suffix_url = '.json'
column_names = [
'by',
'descendents',
'id',
'kids',
'score',
'time',
'title',
'type',
'url'
]
stories_df = pd.DataFrame(columns = column_names)
# Retrieve json data for each story
for story_item_num in stories_list:
story_url = \
story_base_url + \
str(story_item_num) + \
story_suffix_url
story_df = pd.read_json(story_url)
stories_df = pd.concat([stories_df, story_df])
# Remove comment ids if requested
if include_kids == False:
stories_df.drop('kids', inplace = True, axis = 1)
stories_df.drop_duplicates(inplace = True, ignore_index = True)
return stories_df
else:
return stories_df
get_stories('topstories', 3, False)
| by | descendents | id | score | time | title | type | url | descendants | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | cpeterso | NaN | 31523019 | 39 | 1653597069 | Proton Is Trying to Become Google–Without Your... | story | https://www.wired.com/story/proton-mail-calend... | 20.0 |
| 1 | sgbeal | NaN | 31518618 | 498 | 1653574719 | SQLite 3 Fiddle | story | https://sqlite.org/fiddle/ | 83.0 |
| 2 | hwayne | NaN | 31520483 | 222 | 1653583255 | Which dinosaurs lived in your hometown? | story | https://dinosaurpictures.org/ancient-earth#260 | 67.0 |
three_stories = get_stories('topstories', 3, False)
type(three_stories)
pandas.core.frame.DataFrame
three_stories['title'].to_list()
['Proton Is Trying to Become Google–Without Your Data',
'SQLite 3 Fiddle',
'Which dinosaurs lived in your hometown?']