# Uncomment the line below each time the Kernel restarts
#! pip install easymoney
from easymoney.money import EasyPeasy
import requests as reqs
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import datetime
import math
import string
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.ticker import ScalarFormatter
import statsmodels.formula.api as sm


URL = "https://superbowl-ads.com/cost-of-super-bowl-advertising-breakdown-by-year/"
page = reqs.get(URL)
soup = BeautifulSoup(page.text)


table = soup.find('table' ,attrs={'border':'0'})
table_rows = table.find_all('tr')
result = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    if row:
        result.append(row)

result.remove(result[0])
column_titles = result[0]
result.remove(result[0])


df = pd.DataFrame(result, columns=column_titles)
df.head()


df.index = np.arange(1,len(df)+1)

df.drop('Super Bowl', inplace=True, axis=1)
df.drop('Game Date', inplace=True, axis=1)
df.drop('Network', inplace=True, axis=1)
df.drop('Rating', inplace=True, axis=1)
df.drop('Share', inplace=True, axis=1)

for index, row in df.iterrows():
    row["Avg. Cost Per 30-Seconds"] = row["Avg. Cost Per 30-Seconds"].replace(",", "")
    row["Avg. Cost Per 30-Seconds"] = row["Avg. Cost Per 30-Seconds"].replace("$", "")
    row["Avg. Cost Per 30-Seconds"] = int(row["Avg. Cost Per 30-Seconds"])
    row["Avg. Number of Viewers"] = row["Avg. Number of Viewers"].replace(",", "")
    row["Avg. Number of Viewers"] = int(row["Avg. Number of Viewers"])

df.columns = ['Year', 'Cost', 'Views']
    
df.head()


# reverse the Data Frame to begin with the oldest date
df = df.iloc[::-1]

# re-organize the indexes starting from 1
df.index = np.arange(1,len(df)+1)

df.head()


# only views bar graph
df = pd.DataFrame(df)
 
name = df['Year']
price = df['Views']
 
# Figure Size
fig = plt.figure(figsize =(50, 35))
plt.ticklabel_format(style='plain', axis='y')
# Horizontal Bar Plot
plt.bar(name, price, color='orange')
plt.xticks(rotation=90)
plt.xlabel("Years", fontsize=30)
plt.ylabel("# of Viewers", fontsize=30)
plt.title("# of Viewers in Relation to Year", fontsize=30)

plt.grid("On")
# Show Plot
plt.show()


# only costs bar graph
df = pd.DataFrame(df)
 
name = df['Year']
price = df['Cost']
 
# Figure Size
fig = plt.figure(figsize =(50, 35))
plt.ticklabel_format(style='plain', axis='y')

# Horizontal Bar Plot
plt.bar(name, price, color='blue')
plt.xticks(rotation=90)
plt.xlabel("Year", fontsize=30)
plt.ylabel("Cost", fontsize=30)
plt.title("Cost of Advertising in Relation to Year", fontsize=30)

# Show Plot
plt.show()


_ = df.plot(kind='bar', secondary_y='Views', figsize=(50,35))

ax1, ax2 = plt.gcf().get_axes()

ax1.legend(loc='upper left', title='Avg. Cost for Advertisement')
ax2.legend(loc='upper right', title='Avg. Number of Viewers')

ax1.set_title('Avg. Cost of Advertising in Relation to Avg. Viewership', fontsize=45)
ax1.set_ylabel('Avg. Cost of 30 Second Advertisement', fontsize=30)
ax1.set_xlabel('Year of Season', fontsize=30)
ax2.set_ylabel('Avg. Number of Viewers', fontsize=30)

plt.xticks(df.index,df["Year"].values)

ax1.ticklabel_format(style='plain', axis='y')
ax2.ticklabel_format(style='plain', axis='y')

plt.rcParams.update({'font.size': 22})

plt.show()


ep = EasyPeasy()

inflatedDF = df

inflatedCol = []

for index, row in df.iterrows():
    intCost = int(row["Cost"])
    intYear = int(row["Year"])
    euro_value = ep.normalize(amount=intCost, region="US", from_year=intYear, to_year="latest", pretty_print=False)
    adjusted_us_value = ep.currency_converter(amount=euro_value, from_currency="EUR", to_currency="USD", pretty_print=False)
    inflatedCol.append(adjusted_us_value)
    
inflatedDF["Cost"] = inflatedCol
inflatedDF.head()


_ = inflatedDF.plot(kind='bar', secondary_y='Views', figsize=(50,35))

ax1, ax2 = plt.gcf().get_axes()

ax1.legend(loc='upper left', title='Avg. Cost for Advertisement')
ax2.legend(loc='upper right', title='Avg. Number of Viewers')

ax1.set_title('Avg. Cost of Advertising AFTER INFLATION in Relation to Avg. Viewership', fontsize=45)
ax1.set_ylabel('Avg. Cost of 30 Second Advertisement', fontsize=30)
ax1.set_xlabel('Year of Season', fontsize=30)
ax2.set_ylabel('Avg. Number of Viewers', fontsize=30)

plt.xticks(inflatedDF.index,inflatedDF["Year"].values)

ax1.ticklabel_format(style='plain', axis='y')
ax2.ticklabel_format(style='plain', axis='y')

plt.rcParams.update({'font.size': 22})

plt.show()


# examining CPM (cost per thousand viewers)
# CPM Formula: cpm = cost of ad / total number of viewres * 1000

cpmCol = []
for index, row in inflatedDF.iterrows():
    cost = float(row["Cost"])
    views = float(row["Views"])
    cpm = cost / views * 1000
    cpmCol.append(cpm)


inflatedDF["CPM"] = cpmCol
inflatedDF.head()


# plt.figure(30, 15)
# plt.scatter(inflatedDF["Year"], inflatedDF["CPM"])
# plt.show()

# only costs bar graph
inflatedDF = pd.DataFrame(inflatedDF)

# make years ints
intYears = []
for index, row in inflatedDF.iterrows():
    intYears.append(int(row["Year"]))
 
year = intYears
cpm = inflatedDF['CPM']
 
# Figure Size
fig = plt.figure(figsize =(50, 35))
plt.ticklabel_format(style='plain', axis='y')
# Horizontal Bar Plot
plt.scatter(year, cpm, s=1000,c='lightblue')
plt.xticks(rotation=90)
plt.ylabel("CPM (Cost of Ad Per Thousand Views)")
plt.title("CPM per Year")

z = np.polyfit(year, cpm, 5)
p = np.poly1d(z)
plt.plot(year,p(year),"r--")
# Show Plot
plt.show()


sm.ols(formula="CPM ~ Year", data=inflatedDF).fit().summary()

	Super Bowl	Season	Game Date	Network	Rating	Share	Avg. Cost Per 30-Seconds	Avg. Number of Viewers
0	LIV	2019	Feb 2 2020	FOX	41.6	69	$5,600,000	99,900,000
1	LIII	2018	Feb 3 2019	CBS	41.1	67	$5,200,000	98,477,000
2	LII	2017	Feb 4 2018	NBC	43.1	68	$5,235,000	103,400,000
3	LI	2016	Feb 5 2017	FOX	45.3	70	$5,400,000	111,300,000
4	L (50)	2015	Feb 7 2016	CBS	46.6	72	$4,800,000	111,900,000

	Year	Cost	Views
1	2019	5600000	99900000
2	2018	5200000	98477000
3	2017	5235000	103400000
4	2016	5400000	111300000
5	2015	4800000	111900000

	Year	Cost	Views
1	1966	37500	24430000
2	1966	42500	26750000
3	1967	54500	39120000
4	1968	55000	41660000
5	1969	78200	44270000

	Year	Cost	Views
1	1966	299011.63	24430000
2	1966	338879.83	26750000
3	1967	422839.13	39120000
4	1968	409236.63	41660000
5	1969	551722.84	44270000

	Year	Cost	Views	CPM
1	1966	299011.63	24430000	12.239526
2	1966	338879.83	26750000	12.668405
3	1967	422839.13	39120000	10.808771
4	1968	409236.63	41660000	9.823251
5	1969	551722.84	44270000	12.462680

Are Super Bowl Ads Worth It?¶

Project Overview: An Introduction¶

Project Overview: Observations¶

Project Overview: Hypothesis¶

Imports Used¶

Part 1: Data Wrangling, Collection, Curration & Parsing¶

Part 2: Data Management/Representation¶

Part 3: Exploratory Data Analysis¶

Part 4: Hypothesis Testing¶

Part 5: Conclusion¶

Dep. Variable:	CPM	R-squared:	1.000
Model:	OLS	Adj. R-squared:	1.000
Method:	Least Squares	F-statistic:	2178.
Date:	Wed, 21 Jul 2021	Prob (F-statistic):	0.0170
Time:	20:32:45	Log-Likelihood:	97.784
No. Observations:	55	AIC:	-87.57
Df Residuals:	1	BIC:	20.83
Df Model:	53
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	12.4540	0.214	58.077	0.011	9.729	15.179
Year[T.1967]	-1.6452	0.371	-4.429	0.141	-6.365	3.074
Year[T.1968]	-2.6307	0.371	-7.083	0.089	-7.350	2.089
Year[T.1969]	0.0087	0.371	0.023	0.985	-4.711	4.728
Year[T.1970]	-1.9567	0.371	-5.268	0.119	-6.676	2.763
Year[T.1971]	-2.7377	0.371	-7.371	0.086	-7.457	1.982
Year[T.1972]	-2.2276	0.371	-5.998	0.105	-6.947	2.492
Year[T.1973]	-0.7845	0.371	-2.112	0.281	-5.504	3.935
Year[T.1974]	-2.4339	0.371	-6.553	0.096	-7.153	2.285
Year[T.1975]	-3.2874	0.371	-8.851	0.072	-8.007	1.432
Year[T.1976]	-3.2923	0.371	-8.864	0.072	-8.012	1.427
Year[T.1977]	-3.6744	0.371	-9.893	0.064	-8.394	1.045
Year[T.1978]	-2.6335	0.371	-7.090	0.089	-7.353	2.086
Year[T.1979]	-2.0699	0.371	-5.573	0.113	-6.789	2.649
Year[T.1980]	0.1930	0.371	0.520	0.695	-4.526	4.912
Year[T.1981]	-1.6246	0.371	-4.374	0.143	-6.344	3.095
Year[T.1982]	0.6657	0.371	1.792	0.324	-4.054	5.385
Year[T.1983]	-0.1276	0.371	-0.344	0.789	-4.847	4.592
Year[T.1984]	2.8385	0.371	7.642	0.083	-1.881	7.558
Year[T.1985]	1.8415	0.371	4.958	0.127	-2.878	6.561
Year[T.1986]	3.7949	0.371	10.217	0.062	-0.924	8.514
Year[T.1987]	5.8784	0.371	15.827	0.040	1.159	10.598
Year[T.1988]	5.6519	0.371	15.217	0.042	0.933	10.371
Year[T.1989]	7.3459	0.371	19.778	0.032	2.627	12.065
Year[T.1990]	7.4763	0.371	20.129	0.032	2.757	12.196
Year[T.1991]	7.8412	0.371	21.111	0.030	3.122	12.561
Year[T.1992]	4.7766	0.371	12.860	0.049	0.057	9.496
Year[T.1993]	5.4620	0.371	14.706	0.043	0.743	10.181
Year[T.1994]	11.6168	0.371	31.277	0.020	6.897	16.336
Year[T.1995]	7.1335	0.371	19.206	0.033	2.414	11.853
Year[T.1996]	10.0801	0.371	27.139	0.023	5.361	14.799
Year[T.1997]	10.6763	0.371	28.745	0.022	5.957	15.396
Year[T.1998]	17.8894	0.371	48.165	0.013	13.170	22.609
Year[T.1999]	24.4286	0.371	65.771	0.010	19.709	29.148
Year[T.2000]	26.7531	0.371	72.029	0.009	22.034	31.472
Year[T.2001]	24.5923	0.371	66.212	0.010	19.873	29.312
Year[T.2002]	23.2585	0.371	62.621	0.010	18.539	27.978
Year[T.2003]	23.6167	0.371	63.585	0.010	18.897	28.336
Year[T.2004]	25.7527	0.371	69.336	0.009	21.033	30.472
Year[T.2005]	24.0564	0.371	64.769	0.010	19.337	28.776
Year[T.2006]	20.4103	0.371	54.952	0.012	15.691	25.130
Year[T.2007]	22.1304	0.371	59.583	0.011	17.411	26.850
Year[T.2008]	24.0711	0.371	64.808	0.010	19.352	28.790
Year[T.2009]	21.0149	0.371	56.580	0.011	16.296	25.734
Year[T.2010]	20.6908	0.371	55.707	0.011	15.972	25.410
Year[T.2011]	23.7129	0.371	63.844	0.010	18.994	28.432
Year[T.2012]	26.9533	0.371	72.568	0.009	22.234	31.673
Year[T.2013]	27.4019	0.371	73.776	0.009	22.683	32.121
Year[T.2014]	28.1456	0.371	75.779	0.008	23.426	32.865
Year[T.2015]	34.3858	0.371	92.579	0.007	29.666	39.105
Year[T.2016]	39.8648	0.371	107.331	0.006	35.145	44.584
Year[T.2017]	41.0026	0.371	110.394	0.006	36.283	45.722
Year[T.2018]	41.9704	0.371	113.000	0.006	37.251	46.690
Year[T.2019]	44.2936	0.371	119.255	0.005	39.574	49.013

Omnibus:	34.524	Durbin-Watson:	2.500
Prob(Omnibus):	0.000	Jarque-Bera (JB):	1375.573
Skew:	0.000	Prob(JB):	1.99e-299
Kurtosis:	27.500	Cond. No.	39.6