Skip to content

Commit

Permalink
Merge pull request #5 from vmieres/hc/monti
Browse files Browse the repository at this point in the history
Hc/monti
  • Loading branch information
harishkumarchandra authored Nov 7, 2020
2 parents 8a90d90 + 712ac22 commit ed51602
Show file tree
Hide file tree
Showing 5 changed files with 4,604 additions and 1,130 deletions.
171 changes: 171 additions & 0 deletions .ipynb_checkpoints/MCForecastTools-checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# Import libraries and dependencies
import numpy as np
import pandas as pd
import os
import datetime as dt
import pytz

class MCSimulation:
"""
A Python class for runnning Monte Carlo simulation on portfolio price data.
...
Attributes
----------
portfolio_data : pandas.DataFrame
portfolio dataframe
weights: list(float)
portfolio investment breakdown
nSim: int
number of samples in simulation
nTrading: int
number of trading days to simulate
simulated_return : pandas.DataFrame
Simulated data from Monte Carlo
confidence_interval : pandas.Series
the 95% confidence intervals for simulated final cumulative returns
"""

def __init__(self, portfolio_data, weights="", num_simulation=1000, num_trading_days=252):
"""
Constructs all the necessary attributes for the MCSimulation object.
Parameters
----------
portfolio_data: pandas.DataFrame
DataFrame containing stock price information from Alpaca API
weights: list(float)
A list fractions representing percentage of total investment per stock. DEFAULT: Equal distribution
num_simulation: int
Number of simulation samples. DEFAULT: 1000 simulation samples
num_trading_days: int
Number of trading days to simulate. DEFAULT: 252 days (1 year of business days)
"""

# Check to make sure that all attributes are set
if not isinstance(portfolio_data, pd.DataFrame):
raise TypeError("portfolio_data must be a Pandas DataFrame")

# Set weights if empty, otherwise make sure sum of weights equals one.
if weights == "":
num_stocks = len(portfolio_data.columns.unique())
weights = [1.0/num_stocks for s in range(0,num_stocks)]
else:
if round(sum(weights),2) < .99:
raise AttributeError("Sum of portfolio weights must equal one.")

# Calculate daily return if not within dataframe
if not "daily_return" in portfolio_data.columns.get_level_values(1).unique():
close_df = portfolio_data.xs('close',level=1,axis=1).pct_change()
tickers = portfolio_data.columns.get_level_values(0).unique()
column_names = [(x,"daily_return") for x in tickers]
close_df.columns = pd.MultiIndex.from_tuples(column_names)
portfolio_data = portfolio_data.merge(close_df,left_index=True,right_index=True).reindex(columns=tickers,level=0)

# Set class attributes
self.portfolio_data = portfolio_data
self.weights = weights
self.nSim = num_simulation
self.nTrading = num_trading_days
self.simulated_return = ""

def calc_cumulative_return(self):
"""
Calculates the cumulative return of a stock over time using a Monte Carlo simulation (Brownian motion with drift).
"""

# Get closing prices of each stock
last_prices = self.portfolio_data.xs('close',level=1,axis=1)[-1:].values.tolist()[0]

# Calculate the mean and standard deviation of daily returns for each stock
daily_returns = self.portfolio_data.xs('daily_return',level=1,axis=1)
mean_returns = daily_returns.mean().tolist()
std_returns = daily_returns.std().tolist()

# Initialize empty Dataframe to hold simulated prices
portfolio_cumulative_returns = pd.DataFrame()

# Run the simulation of projecting stock prices 'nSim' number of times
for n in range(self.nSim):

if n % 10 == 0:
print(f"Running Monte Carlo simulation number {n}.")

# Create a list of lists to contain the simulated values for each stock
simvals = [[p] for p in last_prices]

# For each stock in our data:
for s in range(len(last_prices)):

# Simulate the returns for each trading day
for i in range(self.nTrading):

# Calculate the simulated price using the last price within the list
simvals[s].append(simvals[s][-1] * (1 + np.random.normal(mean_returns[s], std_returns[s])))

# Calculate the daily returns of simulated prices
sim_df = pd.DataFrame(simvals).T.pct_change()

# Use the `dot` function with the weights to multiply weights with each column's simulated daily returns
sim_df = sim_df.dot(self.weights)

# Calculate the normalized, cumulative return series
portfolio_cumulative_returns[n] = (1 + sim_df.fillna(0)).cumprod()

# Set attribute to use in plotting
self.simulated_return = portfolio_cumulative_returns

# Calculate 95% confidence intervals for final cumulative returns
self.confidence_interval = portfolio_cumulative_returns.iloc[-1, :].quantile(q=[0.025, 0.975])

return portfolio_cumulative_returns

def plot_simulation(self):
"""
Visualizes the simulated stock trajectories using calc_cumulative_return method.
"""

# Check to make sure that simulation has run previously.
if not isinstance(self.simulated_return,pd.DataFrame):
self.calc_cumulative_return()

# Use Pandas plot function to plot the return data
plot_title = f"{self.nSim} Simulations of Cumulative Portfolio Return Trajectories Over the Next {self.nTrading} Trading Days."
return self.simulated_return.plot(legend=None,title=plot_title)

def plot_distribution(self):
"""
Visualizes the distribution of cumulative returns simulated using calc_cumulative_return method.
"""

# Check to make sure that simulation has run previously.
if not isinstance(self.simulated_return,pd.DataFrame):
self.calc_cumulative_return()

# Use the `plot` function to create a probability distribution histogram of simulated ending prices
# with markings for a 95% confidence interval
plot_title = f"Distribution of Final Cumuluative Returns Across All {self.nSim} Simulations"
plt = self.simulated_return.iloc[-1, :].plot(kind='hist', bins=10,density=True,title=plot_title)
plt.axvline(self.confidence_interval.iloc[0], color='r')
plt.axvline(self.confidence_interval.iloc[1], color='r')
return plt

def summarize_cumulative_return(self):
"""
Calculate final summary statistics for Monte Carlo simulated stock data.
"""

# Check to make sure that simulation has run previously.
if not isinstance(self.simulated_return,pd.DataFrame):
self.calc_cumulative_return()

metrics = self.simulated_return.iloc[-1].describe()
ci_series = self.confidence_interval
ci_series.index = ["95% CI Lower","95% CI Upper"]
return metrics.append(ci_series)
Loading

0 comments on commit ed51602

Please sign in to comment.