Netflix Movies and TV Shows (3) Netflix timeline

2 분 소요

앞으로 4개의 캐글 노트북에 대한 글을 정리하려 합니다.
첫번째 캐글은 Netflix Movies and TV Shows 라는 데이터셋입니다. 개인적인 논리적 흐름을 고민하는 데 있어 Netflix Research에서 Analytics와 관련된 아티클을 공부했습니다. 필요하신 분들은 꼭 참고하시면 좋을 것 같습니다.


해당 그래프는 안수빈님의 notebookJosh님의 notebook로부터
코드를 수정한 것입니다. 또한 해당 타임라인의 이벤트는 넷플릭스 공식 홈페이지를 참고하여 제작하였습니다.


이미지

코드

# Timeline code from Subin An's awesome notebook
# https://www.kaggle.com/subinium/awesome-visualization-with-titanic-dataset

from datetime import datetime

tl_dates = [
    "1997\nDVD Rent Idea",
    "1999\nNetflix\nsubscription\ndebut",
    "2002\nIPO for 1$",
    "2005\nProfile\nfeature\nlaunched",
    "2007\nStreaming service",
    "2009\nOver\n10 million"
]

tl_x = [0.6, 3.2, 6.0, 8.4, 10.8, 13]

tl_sub_x = [1.9, 4.7, 7.0, 9.2, 12, 14]

tl_sub_times = [
    "1998", "2000","2003", "2006", "2008", "2010"
]

tl_text = ["Netflix.com\nlaunched", "Starts\npersonal\nrecommendation","Over\n1 million","Over\n5 million","Partnership\nwith electronics",
    "Mobile\ndevices\navailable"
    ]

fig, ax = plt.subplots(figsize=(12, 4), constrained_layout=True)
ax.set_ylim(-2, 1.75)
ax.set_xlim(0, 15)

ax.axhline(0, xmin=0, c='#4a4a4a', zorder=1)

ax.scatter(tl_x, np.zeros(len(tl_x)), s=120, c='#4a4a4a',zorder=2)
ax.scatter(tl_x, np.zeros(len(tl_x)), s=30, c='#fafafa', zorder=3)

ax.scatter(tl_sub_x, np.zeros(len(tl_sub_x)), s=50, c='#4a4a4a', zorder=4)

for x, date in zip(tl_x, tl_dates):
    ax.text(x, -0.2, date, ha='center', fontfamily='serif', fontweight='bold', va = 'top',
            color='#4a4a4a',fontsize=10)
    
levels = np.zeros(len(tl_sub_x))
levels[::2] = 0.3
levels[1::2] = 0.3

markerline, stemline, baseline = ax.stem(tl_sub_x, levels, use_line_collection=True)
plt.setp(baseline, zorder=0)
plt.setp(markerline, marker=',', color='#4a4a4a')
plt.setp(stemline, color='#4a4a4a')

for idx, x, time, txt in zip(range(1, len(tl_sub_x)+1), tl_sub_x, tl_sub_times, tl_text):
    ax.text(x, 0.89, time, ha='center', va = 'top',
            fontfamily='serif', fontweight='bold',
            color='#4a4a4a' if idx!=len(tl_sub_x) else '#b20710', fontsize=10)
    
    ax.text(x, 0.76, txt, va='top',ha='center', fontweight='bold', fontsize=10,
            fontfamily='serif', color='#4a4a4a' if idx!=len(tl_sub_x) else '#b20710')
    
for spine in ['left', 'top', 'right', 'bottom']:
    ax.spines[spine].set_visible(False)

ax.set_xticks([])
ax.set_yticks([])
ax.set_facecolor('none')

ax.set_title('Netflix through the years', fontweight='bold', fontfamily='serif', fontsize=16, color='#4a4a4a')
ax.text(2.4, 1.57, "From DVD rentals to a global audience of over 150m people - is it time for Netflix to Chill?", fontfamily='serif', 
        fontsize=12, color='#4a4a4a')

images_dir = '/content/drive/MyDrive/kaggle/Netflix Movies and TV Shows'
plt.savefig(f"{images_dir}/netflix_history1.png", dpi=200)
from google.colab import files
files.download(f"{images_dir}/netflix_history1.png")

이미지

코드

# Timeline code from Subin An's awesome notebook
# https://www.kaggle.com/subinium/awesome-visualization-with-titanic-dataset

from datetime import datetime

tl_dates = [
    "2011\nFirst\nNetflix button",
    "2013\nWin three\nPrimetime Emmy awards",
    "2015\nFirst original\nfeature film",
    "2017\nOver\n100 million",
    "2019\nWins\n4 Academy Awards",
    "2021\nOver \n200 million"
]

tl_x = [0.6, 3.2, 6.0, 8.4, 10.8, 13]

tl_sub_x = [1.9, 4.7, 7.0, 9.2, 12, 14]

tl_sub_times = [
    "2012", "2014","2016", "2018", "2020", "2022"
]

tl_text = ["Over\n25 million", "Over\n50 million","Expands to\n130 new countries","Winning\n23 Emmy awards","Top 10 lists debut",
    "25th anniversary"
    ]

fig, ax = plt.subplots(figsize=(12, 4), constrained_layout=True)
ax.set_ylim(-2, 1.75)
ax.set_xlim(0, 15)

ax.axhline(0, xmin=0, c='#4a4a4a', zorder=1)

ax.scatter(tl_x, np.zeros(len(tl_x)), s=120, c='#4a4a4a',zorder=2)
ax.scatter(tl_x, np.zeros(len(tl_x)), s=30, c='#fafafa', zorder=3)

ax.scatter(tl_sub_x, np.zeros(len(tl_sub_x)), s=50, c='#4a4a4a', zorder=4)

for x, date in zip(tl_x, tl_dates):
    ax.text(x, -0.2, date, ha='center', fontfamily='serif', fontweight='bold', va = 'top',
            color='#4a4a4a',fontsize=10)
    
levels = np.zeros(len(tl_sub_x))
levels[::2] = 0.3
levels[1::2] = 0.3

markerline, stemline, baseline = ax.stem(tl_sub_x, levels, use_line_collection=True)
plt.setp(baseline, zorder=0)
plt.setp(markerline, marker=',', color='#4a4a4a')
plt.setp(stemline, color='#4a4a4a')

for idx, x, time, txt in zip(range(1, len(tl_sub_x)+1), tl_sub_x, tl_sub_times, tl_text):
    ax.text(x, 0.89, time, ha='center', va = 'top',
            fontfamily='serif', fontweight='bold',
            color='#4a4a4a' if idx!=len(tl_sub_x) else '#b20710', fontsize=10)
    
    ax.text(x, 0.76, txt, va='top',ha='center', fontweight='bold', fontsize=10,
            fontfamily='serif', color='#4a4a4a' if idx!=len(tl_sub_x) else '#b20710')
    
for spine in ['left', 'top', 'right', 'bottom']:
    ax.spines[spine].set_visible(False)

ax.set_xticks([])
ax.set_yticks([])
ax.set_facecolor('none')

ax.set_title('Netflix through the years', fontweight='bold', fontfamily='serif', fontsize=16, color='#4a4a4a')
ax.text(2.4, 1.57, "From DVD rentals to a global audience of over 150m people - is it time for Netflix to Chill?", fontfamily='serif', 
        fontsize=12, color='#4a4a4a')

# images_dir = '/content/drive/MyDrive/kaggle/Netflix Movies and TV Shows'
# plt.savefig(f"{images_dir}/netflix_history2.png", dpi=200)
# from google.colab import files
# files.download(f"{images_dir}/netflix_history2.png")

업로드 주소

해당 주피터 노트북은 여기
지속적으로 업데이트 할 예정이니 전체 코드가 궁금하신 분들은 확인해주세요.

Just do it & Keep steady

댓글남기기