Project: Analyzing Stocks

10 minute read

In this project, we will focus on exploratory data analysis of stock prices for following companies from 2006 to 2015.

  • Amazon
  • Microsoft
  • Bank of America
  • Citibank
#Imports
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import datetime as dt
#visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
#Start and end period
start = dt.datetime(2006,1,1)
end = dt.datetime(2016,1,1)
#Tickers of the companies
tickers = ['AMZN','MSFT','BAC','C']
#Fetch data from yahoo stocks using DataReader
amazon = web.DataReader('AMZN',data_source='yahoo',start=start,end=end)
microsoft = web.DataReader('MSFT',data_source='yahoo',start=start,end=end)
bank_of_america = web.DataReader('BAC',data_source='yahoo',start=start,end=end)
citigroup = web.DataReader('C',data_source='yahoo',start=start,end=end)
#Concating all the data frames of different companies into one large data frame. 
stock_data = pd.concat([amazon,microsoft,bank_of_america,citigroup],axis=1,keys=tickers)
#Adding column names
stock_data.columns.names = ['Tickers','Stock Info']
#Glimpse of data
stock_data.head()
Tickers AMZN MSFT BAC C
Stock Info High Low Open Close Volume Adj Close High Low Open Close Volume Adj Close High Low Open Close Volume Adj Close High Low Open Close Volume Adj Close
Date
2006-01-03 47.849998 46.250000 47.470001 47.580002 7582200 47.580002 27.000000 26.10 26.250000 26.840000 79973000.0 19.602528 47.180000 46.150002 46.919998 47.080002 16296700.0 35.298687 493.799988 481.100006 490.000000 492.899994 1537600.0 440.882477
2006-01-04 47.730000 46.689999 47.490002 47.250000 7440900 47.250000 27.080000 26.77 26.770000 26.969999 57975600.0 19.697485 47.240002 46.450001 47.000000 46.580002 17757900.0 34.923801 491.000000 483.500000 488.600006 483.799988 1870900.0 432.742950
2006-01-05 48.200001 47.110001 47.160000 47.650002 5417200 47.650002 27.129999 26.91 26.959999 26.990000 48245500.0 19.712091 46.830002 46.320000 46.580002 46.639999 14970700.0 34.968796 487.799988 484.000000 484.399994 486.200012 1143100.0 434.889679
2006-01-06 48.580002 47.320000 47.970001 47.869999 6152900 47.869999 27.000000 26.49 26.889999 26.910000 100963000.0 19.653666 46.910000 46.349998 46.799999 46.570000 12599800.0 34.916302 489.000000 482.000000 488.799988 486.200012 1370200.0 434.889679
2006-01-09 47.099998 46.400002 46.549999 47.080002 8943100 47.080002 27.070000 26.76 26.930000 26.860001 55625000.0 19.617136 46.970001 46.360001 46.720001 46.599998 15619400.0 34.938789 487.399994 483.000000 486.000000 483.899994 1680700.0 432.832489
#Max close price for each company's stock
stock_data.xs(key='Close',level='Stock Info',axis=1).max()

    Tickers
    AMZN    693.969971
    MSFT     56.549999
    BAC      54.900002
    C       564.099976
    dtype: float64
#Return of each company
returns = pd.DataFrame()
for ticker in tickers:
    returns[ticker] = stock_data[ticker]['Close'].pct_change()
returns.head()
AMZN MSFT BAC C
Date
2006-01-03 NaN NaN NaN NaN
2006-01-04 -0.006936 0.004843 -0.010620 -0.018462
2006-01-05 0.008466 0.000742 0.001288 0.004961
2006-01-06 0.004617 -0.002964 -0.001501 0.000000
2006-01-09 -0.016503 -0.001858 0.000644 -0.004731
sns.pairplot(returns)

    <seaborn.axisgrid.PairGrid at 0x1877dbb83c8>

png

#Worst return date for each company
returns.idxmin()

    AMZN   2006-07-26
    MSFT   2009-01-22
    BAC    2009-01-20
    C      2009-02-27
    dtype: datetime64[ns]
#Best return date for each company
returns.idxmax()

    AMZN   2007-04-25
    MSFT   2008-10-13
    BAC    2009-04-09
    C      2008-11-24
    dtype: datetime64[ns]
#Standard deviation of returns
returns.std()

    AMZN    0.026638
    MSFT    0.017764
    BAC     0.036647
    C       0.038672
    dtype: float64
#Standard deviation of returns for year 2015
returns.loc['2015-01-01':'2015-12-31'].std()

    AMZN    0.021147
    MSFT    0.017801
    BAC     0.016163
    C       0.015289
    dtype: float64
#Lets create distribution plot of the stock returns of Amazon for year 2015
sns.distplot(returns.loc['2015-01-01':'2015-12-31']['AMZN'],bins=100)

    <matplotlib.axes._subplots.AxesSubplot at 0x1877f3e4588>

png

#Lets create distribution plot of the stock returns of Citigroup for year 2008
sns.distplot(returns.loc['2008-01-01':'2008-12-31']['C'],bins=100)

    <matplotlib.axes._subplots.AxesSubplot at 0x187009ce518>

png

#Close price of each company throughout the entire time period of 2006-2015
stock_data.xs(key='Close',axis=1,level='Stock Info').plot(figsize=(10,5))
plt.legend()

    <matplotlib.legend.Legend at 0x1877fdf1278>

png

#30 day moving average of Amazon for year 2015
stock_data.loc['2015-1-1':'2015-12-31','AMZN']['Close'].rolling(30).mean().plot(figsize=(10,5),label='MA30')
stock_data.loc['2015-1-1':'2015-12-31','AMZN']['Close'].plot(label='Close')
plt.tight_layout()
plt.legend()

    <matplotlib.legend.Legend at 0x1870483e048>

png

#30 day moving average of Citigroup for year 2008
stock_data.loc['2008-1-1':'2008-12-31','C']['Close'].rolling(30).mean().plot(figsize=(10,5),label='MA30')
stock_data.loc['2008-1-1':'2008-12-31','C']['Close'].plot(label='Close')
plt.tight_layout()
plt.legend()

    <matplotlib.legend.Legend at 0x18704839898>

png

#Heatmap for the relation between close prices of different stocks
sns.heatmap(stock_data.xs(key='Close',axis=1,level='Stock Info').corr(),cmap='magma_r',annot=True)

    <matplotlib.axes._subplots.AxesSubplot at 0x18704ae9a58>

png

Leave a comment