mirror of
https://github.com/tcsenpai/goldigger.git
synced 2025-06-06 10:45:20 +00:00
Better visualizations
This commit is contained in:
parent
aec7171ed9
commit
db5cf0b2a8
566
goldigger.py
566
goldigger.py
@ -22,31 +22,18 @@ import sklearn.base
|
|||||||
import argparse
|
import argparse
|
||||||
from sklearn.feature_selection import SelectKBest, f_regression, RFE
|
from sklearn.feature_selection import SelectKBest, f_regression, RFE
|
||||||
from tensorflow.keras.regularizers import l1_l2
|
from tensorflow.keras.regularizers import l1_l2
|
||||||
|
from matplotlib.dates import num2date
|
||||||
|
|
||||||
|
|
||||||
# Suppress warnings and TensorFlow logging
|
# Suppress warnings and TensorFlow logging
|
||||||
def suppress_warnings_method():
|
def suppress_warnings_method():
|
||||||
# Filter out warnings
|
# Filter out warnings
|
||||||
warnings.filterwarnings('ignore')
|
warnings.filterwarnings("ignore")
|
||||||
# Suppress TensorFlow logging
|
# Suppress TensorFlow logging
|
||||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||||
# Suppress TensorFlow verbose logging
|
# Suppress TensorFlow verbose logging
|
||||||
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
|
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
|
||||||
|
|
||||||
# Custom progress bar for Keras model training
|
|
||||||
class TqdmProgressCallback(Callback):
|
|
||||||
def __init__(self, epochs, description):
|
|
||||||
super().__init__()
|
|
||||||
# Initialize progress bar
|
|
||||||
self.progress_bar = tqdm(total=epochs, desc=description, leave=False)
|
|
||||||
|
|
||||||
def on_epoch_end(self, epoch, logs=None):
|
|
||||||
# Update progress bar at the end of each epoch
|
|
||||||
self.progress_bar.update(1)
|
|
||||||
self.progress_bar.set_postfix(loss=f"{logs['loss']:.4f}", val_loss=f"{logs['val_loss']:.4f}")
|
|
||||||
|
|
||||||
def on_train_end(self, logs=None):
|
|
||||||
# Close progress bar at the end of training
|
|
||||||
self.progress_bar.close()
|
|
||||||
|
|
||||||
# Fetch historical stock data from Yahoo Finance
|
# Fetch historical stock data from Yahoo Finance
|
||||||
def fetch_stock_data(symbol, start_date, end_date):
|
def fetch_stock_data(symbol, start_date, end_date):
|
||||||
@ -56,27 +43,35 @@ def fetch_stock_data(symbol, start_date, end_date):
|
|||||||
data = yf.download(symbol, start=start_date, end=end_date)
|
data = yf.download(symbol, start=start_date, end=end_date)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
# Add technical indicators to the stock data
|
# Add technical indicators to the stock data
|
||||||
def add_technical_indicators(data):
|
def add_technical_indicators(data):
|
||||||
"""
|
"""
|
||||||
Add technical indicators to the dataset.
|
Add technical indicators to the dataset.
|
||||||
"""
|
"""
|
||||||
data['SMA_20'] = ta.trend.sma_indicator(data['Close'], window=20)
|
data["SMA_20"] = ta.trend.sma_indicator(data["Close"], window=20)
|
||||||
data['SMA_50'] = ta.trend.sma_indicator(data['Close'], window=50)
|
data["SMA_50"] = ta.trend.sma_indicator(data["Close"], window=50)
|
||||||
data['RSI'] = ta.momentum.rsi(data['Close'], window=14)
|
data["RSI"] = ta.momentum.rsi(data["Close"], window=14)
|
||||||
data['MACD'] = ta.trend.macd_diff(data['Close'])
|
data["MACD"] = ta.trend.macd_diff(data["Close"])
|
||||||
data['BB_upper'], data['BB_middle'], data['BB_lower'] = ta.volatility.bollinger_hband_indicator(data['Close']), ta.volatility.bollinger_mavg(data['Close']), ta.volatility.bollinger_lband_indicator(data['Close'])
|
data["BB_upper"], data["BB_middle"], data["BB_lower"] = (
|
||||||
|
ta.volatility.bollinger_hband_indicator(data["Close"]),
|
||||||
|
ta.volatility.bollinger_mavg(data["Close"]),
|
||||||
|
ta.volatility.bollinger_lband_indicator(data["Close"]),
|
||||||
|
)
|
||||||
# Advanced indicators
|
# Advanced indicators
|
||||||
data['EMA_20'] = ta.trend.ema_indicator(data['Close'], window=20)
|
data["EMA_20"] = ta.trend.ema_indicator(data["Close"], window=20)
|
||||||
data['ATR'] = ta.volatility.average_true_range(data['High'], data['Low'], data['Close'])
|
data["ATR"] = ta.volatility.average_true_range(
|
||||||
data['ADX'] = ta.trend.adx(data['High'], data['Low'], data['Close'])
|
data["High"], data["Low"], data["Close"]
|
||||||
data['Stoch_K'] = ta.momentum.stoch(data['High'], data['Low'], data['Close'])
|
)
|
||||||
data['Volatility'] = data['Close'].rolling(window=20).std()
|
data["ADX"] = ta.trend.adx(data["High"], data["Low"], data["Close"])
|
||||||
data['Price_Change'] = data['Close'].pct_change()
|
data["Stoch_K"] = ta.momentum.stoch(data["High"], data["Low"], data["Close"])
|
||||||
data['Volume_Change'] = data['Volume'].pct_change()
|
data["Volatility"] = data["Close"].rolling(window=20).std()
|
||||||
data['High_Low_Range'] = (data['High'] - data['Low']) / data['Close']
|
data["Price_Change"] = data["Close"].pct_change()
|
||||||
|
data["Volume_Change"] = data["Volume"].pct_change()
|
||||||
|
data["High_Low_Range"] = (data["High"] - data["Low"]) / data["Close"]
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
# Prepare data for model training by scaling and creating sequences
|
# Prepare data for model training by scaling and creating sequences
|
||||||
def prepare_data(data, look_back=60):
|
def prepare_data(data, look_back=60):
|
||||||
"""
|
"""
|
||||||
@ -92,39 +87,60 @@ def prepare_data(data, look_back=60):
|
|||||||
|
|
||||||
return np.array(X), np.array(y), scaler
|
return np.array(X), np.array(y), scaler
|
||||||
|
|
||||||
|
|
||||||
# Create an LSTM model for time series prediction
|
# Create an LSTM model for time series prediction
|
||||||
def create_lstm_model(input_shape):
|
def create_lstm_model(input_shape):
|
||||||
model = Sequential([
|
model = Sequential(
|
||||||
LSTM(units=64, return_sequences=True, input_shape=input_shape, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
|
[
|
||||||
|
LSTM(
|
||||||
|
units=64,
|
||||||
|
return_sequences=True,
|
||||||
|
input_shape=input_shape,
|
||||||
|
kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4),
|
||||||
|
),
|
||||||
Dropout(0.2), # Add dropout layer
|
Dropout(0.2), # Add dropout layer
|
||||||
LSTM(units=32, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
|
LSTM(units=32, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
|
||||||
Dropout(0.2), # Add dropout layer
|
Dropout(0.2), # Add dropout layer
|
||||||
Dense(units=16, activation='relu', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
|
Dense(
|
||||||
Dense(units=1)
|
units=16, activation="relu", kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)
|
||||||
])
|
),
|
||||||
model.compile(optimizer='adam', loss='mean_squared_error')
|
Dense(units=1),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
model.compile(optimizer="adam", loss="mean_squared_error")
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
# Create a GRU model for time series prediction
|
# Create a GRU model for time series prediction
|
||||||
def create_gru_model(input_shape):
|
def create_gru_model(input_shape):
|
||||||
model = Sequential([
|
model = Sequential(
|
||||||
GRU(units=64, return_sequences=True, input_shape=input_shape, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
|
[
|
||||||
|
GRU(
|
||||||
|
units=64,
|
||||||
|
return_sequences=True,
|
||||||
|
input_shape=input_shape,
|
||||||
|
kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4),
|
||||||
|
),
|
||||||
Dropout(0.2), # Add dropout layer
|
Dropout(0.2), # Add dropout layer
|
||||||
GRU(units=32, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
|
GRU(units=32, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
|
||||||
Dropout(0.2), # Add dropout layer
|
Dropout(0.2), # Add dropout layer
|
||||||
Dense(units=16, activation='relu', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
|
Dense(
|
||||||
Dense(units=1)
|
units=16, activation="relu", kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)
|
||||||
])
|
),
|
||||||
model.compile(optimizer='adam', loss='mean_squared_error')
|
Dense(units=1),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
model.compile(optimizer="adam", loss="mean_squared_error")
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
# Train and evaluate a model using time series cross-validation
|
# Train and evaluate a model using time series cross-validation
|
||||||
def train_and_evaluate_model(model, X, y, n_splits=5, model_name="Model"):
|
def train_and_evaluate_model(model, X, y, n_splits=5, model_name="Model"):
|
||||||
tscv = TimeSeriesSplit(n_splits=n_splits)
|
tscv = TimeSeriesSplit(n_splits=n_splits)
|
||||||
all_predictions = []
|
all_predictions = []
|
||||||
all_true_values = []
|
all_true_values = []
|
||||||
|
|
||||||
with tqdm(total=n_splits, desc=f"Cross-validation for {model_name}", leave=False) as pbar:
|
with tqdm(total=n_splits, desc=f"Training {model_name}", leave=False) as pbar:
|
||||||
for train_index, test_index in tscv.split(X):
|
for train_index, test_index in tscv.split(X):
|
||||||
X_train, X_test = X[train_index], X[test_index]
|
X_train, X_test = X[train_index], X[test_index]
|
||||||
y_train, y_test = y[train_index], y[test_index]
|
y_train, y_test = y[train_index], y[test_index]
|
||||||
@ -135,10 +151,25 @@ def train_and_evaluate_model(model, X, y, n_splits=5, model_name="Model"):
|
|||||||
model.fit(X_train_2d, y_train)
|
model.fit(X_train_2d, y_train)
|
||||||
predictions = model.predict(X_test_2d)
|
predictions = model.predict(X_test_2d)
|
||||||
elif isinstance(model, Sequential):
|
elif isinstance(model, Sequential):
|
||||||
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
|
early_stopping = EarlyStopping(
|
||||||
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0,
|
monitor="val_loss", patience=10, restore_best_weights=True
|
||||||
validation_split=0.2, callbacks=[early_stopping])
|
)
|
||||||
predictions = model.predict(X_test).flatten()
|
with tqdm(total=100, desc="Epochs", leave=False) as epoch_pbar:
|
||||||
|
|
||||||
|
class EpochProgressCallback(Callback):
|
||||||
|
def on_epoch_end(self, epoch, logs=None):
|
||||||
|
epoch_pbar.update(1)
|
||||||
|
|
||||||
|
model.fit(
|
||||||
|
X_train,
|
||||||
|
y_train,
|
||||||
|
epochs=100,
|
||||||
|
batch_size=32,
|
||||||
|
verbose=0,
|
||||||
|
validation_split=0.2,
|
||||||
|
callbacks=[early_stopping, EpochProgressCallback()],
|
||||||
|
)
|
||||||
|
predictions = model.predict(X_test, verbose=0).flatten()
|
||||||
|
|
||||||
all_predictions.extend(predictions)
|
all_predictions.extend(predictions)
|
||||||
all_true_values.extend(y_test)
|
all_true_values.extend(y_test)
|
||||||
@ -147,6 +178,7 @@ def train_and_evaluate_model(model, X, y, n_splits=5, model_name="Model"):
|
|||||||
score = r2_score(all_true_values, all_predictions)
|
score = r2_score(all_true_values, all_predictions)
|
||||||
return score, 0, score, np.array(all_predictions)
|
return score, 0, score, np.array(all_predictions)
|
||||||
|
|
||||||
|
|
||||||
# Make predictions using an ensemble of models
|
# Make predictions using an ensemble of models
|
||||||
def ensemble_predict(models, X):
|
def ensemble_predict(models, X):
|
||||||
predictions = []
|
predictions = []
|
||||||
@ -158,41 +190,60 @@ def ensemble_predict(models, X):
|
|||||||
predictions.append(pred.flatten()) # Flatten the predictions
|
predictions.append(pred.flatten()) # Flatten the predictions
|
||||||
return np.mean(predictions, axis=0)
|
return np.mean(predictions, axis=0)
|
||||||
|
|
||||||
|
|
||||||
def weighted_ensemble_predict(models, X, weights):
|
def weighted_ensemble_predict(models, X, weights):
|
||||||
predictions = []
|
predictions = []
|
||||||
for model, weight in zip(models, weights):
|
for model, weight in zip(models, weights):
|
||||||
if isinstance(model, (RandomForestRegressor, XGBRegressor)):
|
if isinstance(model, (RandomForestRegressor, XGBRegressor)):
|
||||||
pred = model.predict(X.reshape(X.shape[0], -1))
|
pred = model.predict(X.reshape(X.shape[0], -1))
|
||||||
else:
|
else:
|
||||||
pred = np.array([model.predict(X[i:i+1])[0][0] for i in range(len(X))])
|
pred = np.array(
|
||||||
|
[model.predict(X[i : i + 1], verbose=0)[0][0] for i in range(len(X))]
|
||||||
|
)
|
||||||
predictions.append(weight * pred)
|
predictions.append(weight * pred)
|
||||||
return np.sum(predictions, axis=0)
|
return np.sum(predictions, axis=0)
|
||||||
|
|
||||||
|
|
||||||
# Calculate risk metrics (Sharpe ratio and max drawdown)
|
# Calculate risk metrics (Sharpe ratio and max drawdown)
|
||||||
def calculate_risk_metrics(returns):
|
def calculate_risk_metrics(returns):
|
||||||
sharpe_ratio = np.mean(returns) / np.std(returns) * np.sqrt(252) # Assuming daily returns
|
sharpe_ratio = (
|
||||||
|
np.mean(returns) / np.std(returns) * np.sqrt(252)
|
||||||
|
) # Assuming daily returns
|
||||||
max_drawdown = np.max(np.maximum.accumulate(returns) - returns)
|
max_drawdown = np.max(np.maximum.accumulate(returns) - returns)
|
||||||
return sharpe_ratio, max_drawdown
|
return sharpe_ratio, max_drawdown
|
||||||
|
|
||||||
|
|
||||||
# Predict future stock prices using a trained model
|
# Predict future stock prices using a trained model
|
||||||
def predict_future(model, last_sequence, scaler, days):
|
def predict_future(model, last_sequence, scaler, days):
|
||||||
future_predictions = []
|
future_predictions = []
|
||||||
current_sequence = last_sequence.copy()
|
current_sequence = last_sequence.copy()
|
||||||
|
|
||||||
|
with tqdm(total=days, desc="Predicting future", leave=False) as pbar:
|
||||||
for _ in range(days):
|
for _ in range(days):
|
||||||
if isinstance(model, (RandomForestRegressor, XGBRegressor)):
|
if isinstance(model, (RandomForestRegressor, XGBRegressor)):
|
||||||
prediction = model.predict(current_sequence.reshape(1, -1))
|
prediction = model.predict(current_sequence.reshape(1, -1))
|
||||||
future_predictions.append(prediction[0]) # prediction is already a scalar
|
future_predictions.append(
|
||||||
|
prediction[0]
|
||||||
|
) # prediction is already a scalar
|
||||||
else:
|
else:
|
||||||
prediction = model.predict(current_sequence.reshape(1, current_sequence.shape[0], current_sequence.shape[1]))
|
prediction = model.predict(
|
||||||
future_predictions.append(prediction[0][0]) # Take only the first (and only) element
|
current_sequence.reshape(
|
||||||
|
1, current_sequence.shape[0], current_sequence.shape[1]
|
||||||
|
),
|
||||||
|
verbose=0,
|
||||||
|
)
|
||||||
|
future_predictions.append(
|
||||||
|
prediction[0][0]
|
||||||
|
) # Take only the first (and only) element
|
||||||
|
|
||||||
# Update the sequence for the next prediction
|
# Update the sequence for the next prediction
|
||||||
current_sequence = np.roll(current_sequence, -1, axis=0)
|
current_sequence = np.roll(current_sequence, -1, axis=0)
|
||||||
current_sequence[-1] = prediction # Use the full prediction for updating
|
current_sequence[-1] = prediction # Use the full prediction for updating
|
||||||
|
pbar.update(1)
|
||||||
|
|
||||||
return np.array(future_predictions)
|
return np.array(future_predictions)
|
||||||
|
|
||||||
|
|
||||||
# Split data into training and testing sets, respecting temporal order
|
# Split data into training and testing sets, respecting temporal order
|
||||||
def time_based_train_test_split(X, y, test_size=0.2):
|
def time_based_train_test_split(X, y, test_size=0.2):
|
||||||
"""
|
"""
|
||||||
@ -203,25 +254,23 @@ def time_based_train_test_split(X, y, test_size=0.2):
|
|||||||
y_train, y_test = y[:split_idx], y[split_idx:]
|
y_train, y_test = y[:split_idx], y[split_idx:]
|
||||||
return X_train, X_test, y_train, y_test
|
return X_train, X_test, y_train, y_test
|
||||||
|
|
||||||
|
|
||||||
# Tune hyperparameters for Random Forest model
|
# Tune hyperparameters for Random Forest model
|
||||||
def tune_random_forest(X, y, quick_test=False):
|
def tune_random_forest(X, y, quick_test=False):
|
||||||
# Define parameter distribution based on quick_test flag
|
# Define parameter distribution based on quick_test flag
|
||||||
if quick_test:
|
if quick_test:
|
||||||
print("Quick test mode: Performing simplified Random Forest tuning...")
|
print("Quick test mode: Performing simplified Random Forest tuning...")
|
||||||
param_dist = {
|
param_dist = {"n_estimators": randint(10, 50), "max_depth": randint(3, 10)}
|
||||||
'n_estimators': randint(10, 50),
|
|
||||||
'max_depth': randint(3, 10)
|
|
||||||
}
|
|
||||||
n_iter = 5
|
n_iter = 5
|
||||||
else:
|
else:
|
||||||
print("Full analysis mode: Performing comprehensive Random Forest tuning...")
|
print("Full analysis mode: Performing comprehensive Random Forest tuning...")
|
||||||
param_dist = {
|
param_dist = {
|
||||||
'n_estimators': randint(100, 500),
|
"n_estimators": randint(100, 500),
|
||||||
'max_depth': randint(5, 50),
|
"max_depth": randint(5, 50),
|
||||||
'min_samples_split': randint(2, 20),
|
"min_samples_split": randint(2, 20),
|
||||||
'min_samples_leaf': randint(1, 10),
|
"min_samples_leaf": randint(1, 10),
|
||||||
'max_features': ['auto', 'sqrt', 'log2'],
|
"max_features": ["auto", "sqrt", "log2"],
|
||||||
'bootstrap': [True, False]
|
"bootstrap": [True, False],
|
||||||
}
|
}
|
||||||
n_iter = 20
|
n_iter = 20
|
||||||
|
|
||||||
@ -229,33 +278,37 @@ def tune_random_forest(X, y, quick_test=False):
|
|||||||
rf = RandomForestRegressor(random_state=42)
|
rf = RandomForestRegressor(random_state=42)
|
||||||
# Perform randomized search for best parameters
|
# Perform randomized search for best parameters
|
||||||
tscv = TimeSeriesSplit(n_splits=3 if quick_test else 5)
|
tscv = TimeSeriesSplit(n_splits=3 if quick_test else 5)
|
||||||
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=param_dist,
|
rf_random = RandomizedSearchCV(
|
||||||
n_iter=n_iter, cv=tscv,
|
estimator=rf,
|
||||||
scoring='neg_mean_squared_error', # Change to MSE
|
param_distributions=param_dist,
|
||||||
verbose=2, random_state=42, n_jobs=-1)
|
n_iter=n_iter,
|
||||||
|
cv=tscv,
|
||||||
|
scoring="neg_mean_squared_error", # Change to MSE
|
||||||
|
verbose=2,
|
||||||
|
random_state=42,
|
||||||
|
n_jobs=-1,
|
||||||
|
)
|
||||||
rf_random.fit(X.reshape(X.shape[0], -1), y)
|
rf_random.fit(X.reshape(X.shape[0], -1), y)
|
||||||
print(f"Best Random Forest parameters: {rf_random.best_params_}")
|
print(f"Best Random Forest parameters: {rf_random.best_params_}")
|
||||||
return rf_random.best_estimator_
|
return rf_random.best_estimator_
|
||||||
|
|
||||||
|
|
||||||
# Tune hyperparameters for XGBoost model
|
# Tune hyperparameters for XGBoost model
|
||||||
def tune_xgboost(X, y, quick_test=False):
|
def tune_xgboost(X, y, quick_test=False):
|
||||||
# Define parameter distribution based on quick_test flag
|
# Define parameter distribution based on quick_test flag
|
||||||
if quick_test:
|
if quick_test:
|
||||||
print("Quick test mode: Performing simplified XGBoost tuning...")
|
print("Quick test mode: Performing simplified XGBoost tuning...")
|
||||||
param_dist = {
|
param_dist = {"n_estimators": randint(10, 50), "max_depth": randint(3, 6)}
|
||||||
'n_estimators': randint(10, 50),
|
|
||||||
'max_depth': randint(3, 6)
|
|
||||||
}
|
|
||||||
n_iter = 5
|
n_iter = 5
|
||||||
else:
|
else:
|
||||||
print("Full analysis mode: Performing comprehensive XGBoost tuning...")
|
print("Full analysis mode: Performing comprehensive XGBoost tuning...")
|
||||||
param_dist = {
|
param_dist = {
|
||||||
'n_estimators': randint(100, 500),
|
"n_estimators": randint(100, 500),
|
||||||
'max_depth': randint(3, 10),
|
"max_depth": randint(3, 10),
|
||||||
'learning_rate': uniform(0.01, 0.3),
|
"learning_rate": uniform(0.01, 0.3),
|
||||||
'subsample': uniform(0.6, 1.0),
|
"subsample": uniform(0.6, 1.0),
|
||||||
'colsample_bytree': uniform(0.6, 1.0),
|
"colsample_bytree": uniform(0.6, 1.0),
|
||||||
'gamma': uniform(0, 5)
|
"gamma": uniform(0, 5),
|
||||||
}
|
}
|
||||||
n_iter = 20
|
n_iter = 20
|
||||||
|
|
||||||
@ -263,19 +316,28 @@ def tune_xgboost(X, y, quick_test=False):
|
|||||||
xgb = XGBRegressor(random_state=42)
|
xgb = XGBRegressor(random_state=42)
|
||||||
# Perform randomized search for best parameters
|
# Perform randomized search for best parameters
|
||||||
tscv = TimeSeriesSplit(n_splits=3 if quick_test else 5)
|
tscv = TimeSeriesSplit(n_splits=3 if quick_test else 5)
|
||||||
xgb_random = RandomizedSearchCV(estimator=xgb, param_distributions=param_dist,
|
xgb_random = RandomizedSearchCV(
|
||||||
n_iter=n_iter, cv=tscv,
|
estimator=xgb,
|
||||||
scoring='neg_mean_squared_error', # Change to MSE
|
param_distributions=param_dist,
|
||||||
verbose=2, random_state=42, n_jobs=-1)
|
n_iter=n_iter,
|
||||||
|
cv=tscv,
|
||||||
|
scoring="neg_mean_squared_error", # Change to MSE
|
||||||
|
verbose=2,
|
||||||
|
random_state=42,
|
||||||
|
n_jobs=-1,
|
||||||
|
)
|
||||||
xgb_random.fit(X.reshape(X.shape[0], -1), y)
|
xgb_random.fit(X.reshape(X.shape[0], -1), y)
|
||||||
print(f"Best XGBoost parameters: {xgb_random.best_params_}")
|
print(f"Best XGBoost parameters: {xgb_random.best_params_}")
|
||||||
return xgb_random.best_estimator_
|
return xgb_random.best_estimator_
|
||||||
|
|
||||||
|
|
||||||
def implement_trading_strategy(actual_prices, predicted_prices, threshold=0.01):
|
def implement_trading_strategy(actual_prices, predicted_prices, threshold=0.01):
|
||||||
returns = []
|
returns = []
|
||||||
position = 0 # -1: short, 0: neutral, 1: long
|
position = 0 # -1: short, 0: neutral, 1: long
|
||||||
for i in range(1, len(actual_prices)):
|
for i in range(1, len(actual_prices)):
|
||||||
predicted_return = (predicted_prices[i] - actual_prices[i-1]) / actual_prices[i-1]
|
predicted_return = (predicted_prices[i] - actual_prices[i - 1]) / actual_prices[
|
||||||
|
i - 1
|
||||||
|
]
|
||||||
if predicted_return > threshold and position <= 0:
|
if predicted_return > threshold and position <= 0:
|
||||||
position = 1 # Buy
|
position = 1 # Buy
|
||||||
elif predicted_return < -threshold and position >= 0:
|
elif predicted_return < -threshold and position >= 0:
|
||||||
@ -284,21 +346,28 @@ def implement_trading_strategy(actual_prices, predicted_prices, threshold=0.01):
|
|||||||
returns.append(position * actual_return)
|
returns.append(position * actual_return)
|
||||||
return np.array(returns)
|
return np.array(returns)
|
||||||
|
|
||||||
|
|
||||||
def select_features_rfe(X, y, n_features_to_select=10):
|
def select_features_rfe(X, y, n_features_to_select=10):
|
||||||
if isinstance(X, np.ndarray) and len(X.shape) == 3:
|
if isinstance(X, np.ndarray) and len(X.shape) == 3:
|
||||||
X_2d = X.reshape(X.shape[0], -1)
|
X_2d = X.reshape(X.shape[0], -1)
|
||||||
else:
|
else:
|
||||||
X_2d = X
|
X_2d = X
|
||||||
|
|
||||||
rfe = RFE(estimator=RandomForestRegressor(random_state=42), n_features_to_select=n_features_to_select)
|
rfe = RFE(
|
||||||
|
estimator=RandomForestRegressor(random_state=42),
|
||||||
|
n_features_to_select=n_features_to_select,
|
||||||
|
)
|
||||||
X_selected = rfe.fit_transform(X_2d, y)
|
X_selected = rfe.fit_transform(X_2d, y)
|
||||||
selected_features = rfe.support_
|
selected_features = rfe.support_
|
||||||
return X_selected, selected_features
|
return X_selected, selected_features
|
||||||
|
|
||||||
|
|
||||||
def calculate_ensemble_weights(models, X, y):
|
def calculate_ensemble_weights(models, X, y):
|
||||||
weights = []
|
weights = []
|
||||||
for name, model in models:
|
for name, model in models:
|
||||||
_, _, score, _ = train_and_evaluate_model(model, X, y, n_splits=5, model_name=name)
|
_, _, score, _ = train_and_evaluate_model(
|
||||||
|
model, X, y, n_splits=5, model_name=name
|
||||||
|
)
|
||||||
weights.append(max(score, 0)) # Ensure non-negative weights
|
weights.append(max(score, 0)) # Ensure non-negative weights
|
||||||
|
|
||||||
if sum(weights) == 0:
|
if sum(weights) == 0:
|
||||||
@ -307,6 +376,7 @@ def calculate_ensemble_weights(models, X, y):
|
|||||||
else:
|
else:
|
||||||
return [w / sum(weights) for w in weights] # Normalize weights
|
return [w / sum(weights) for w in weights] # Normalize weights
|
||||||
|
|
||||||
|
|
||||||
def augment_data(X, y, noise_level=0.01):
|
def augment_data(X, y, noise_level=0.01):
|
||||||
X_aug = X.copy()
|
X_aug = X.copy()
|
||||||
y_aug = y.copy()
|
y_aug = y.copy()
|
||||||
@ -314,8 +384,17 @@ def augment_data(X, y, noise_level=0.01):
|
|||||||
X_aug += noise
|
X_aug += noise
|
||||||
return X_aug, y_aug
|
return X_aug, y_aug
|
||||||
|
|
||||||
|
|
||||||
# Main function to analyze stock data and make predictions
|
# Main function to analyze stock data and make predictions
|
||||||
def analyze_and_predict_stock(symbol, start_date, end_date, future_days=30, suppress_warnings=False, quick_test=False, models_to_run=['LSTM', 'GRU', 'Random Forest', 'XGBoost']):
|
def analyze_and_predict_stock(
|
||||||
|
symbol,
|
||||||
|
start_date,
|
||||||
|
end_date,
|
||||||
|
future_days=30,
|
||||||
|
suppress_warnings=False,
|
||||||
|
quick_test=False,
|
||||||
|
models_to_run=["LSTM", "GRU", "Random Forest", "XGBoost"],
|
||||||
|
):
|
||||||
# Suppress warnings if flag is set
|
# Suppress warnings if flag is set
|
||||||
if suppress_warnings:
|
if suppress_warnings:
|
||||||
suppress_warnings_method()
|
suppress_warnings_method()
|
||||||
@ -333,7 +412,21 @@ def analyze_and_predict_stock(symbol, start_date, end_date, future_days=30, supp
|
|||||||
data = data.tail(100)
|
data = data.tail(100)
|
||||||
|
|
||||||
print("Preparing data for model training...")
|
print("Preparing data for model training...")
|
||||||
features = ['Close', 'Volume', 'SMA_20', 'SMA_50', 'RSI', 'MACD', 'BB_upper', 'BB_middle', 'BB_lower', 'Volatility', 'Price_Change', 'Volume_Change', 'High_Low_Range']
|
features = [
|
||||||
|
"Close",
|
||||||
|
"Volume",
|
||||||
|
"SMA_20",
|
||||||
|
"SMA_50",
|
||||||
|
"RSI",
|
||||||
|
"MACD",
|
||||||
|
"BB_upper",
|
||||||
|
"BB_middle",
|
||||||
|
"BB_lower",
|
||||||
|
"Volatility",
|
||||||
|
"Price_Change",
|
||||||
|
"Volume_Change",
|
||||||
|
"High_Low_Range",
|
||||||
|
]
|
||||||
X, y, scaler = prepare_data(data[features])
|
X, y, scaler = prepare_data(data[features])
|
||||||
|
|
||||||
print("Augmenting data...")
|
print("Augmenting data...")
|
||||||
@ -346,22 +439,24 @@ def analyze_and_predict_stock(symbol, start_date, end_date, future_days=30, supp
|
|||||||
|
|
||||||
print("\nStarting model training and hyperparameter tuning...")
|
print("\nStarting model training and hyperparameter tuning...")
|
||||||
models = []
|
models = []
|
||||||
if 'LSTM' in models_to_run:
|
if "LSTM" in models_to_run:
|
||||||
models.append(("LSTM", create_lstm_model((X.shape[1], X.shape[2]))))
|
models.append(("LSTM", create_lstm_model((X.shape[1], X.shape[2]))))
|
||||||
if 'GRU' in models_to_run:
|
if "GRU" in models_to_run:
|
||||||
models.append(("GRU", create_gru_model((X.shape[1], X.shape[2]))))
|
models.append(("GRU", create_gru_model((X.shape[1], X.shape[2]))))
|
||||||
if 'Random Forest' in models_to_run:
|
if "Random Forest" in models_to_run:
|
||||||
models.append(("Random Forest", tune_random_forest(X, y, quick_test)))
|
models.append(("Random Forest", tune_random_forest(X, y, quick_test)))
|
||||||
if 'XGBoost' in models_to_run:
|
if "XGBoost" in models_to_run:
|
||||||
models.append(("XGBoost", tune_xgboost(X, y, quick_test)))
|
models.append(("XGBoost", tune_xgboost(X, y, quick_test)))
|
||||||
|
|
||||||
results = {}
|
results = {}
|
||||||
oof_predictions = {}
|
oof_predictions = {}
|
||||||
model_stats = []
|
model_stats = []
|
||||||
with tqdm(total=len(models), desc="Overall Progress", position=0) as pbar:
|
with tqdm(total=len(models), desc="Training Models", position=0) as pbar:
|
||||||
for name, model in models:
|
for name, model in models:
|
||||||
print(f"\nTraining and evaluating {name} model...")
|
print(f"\nTraining and evaluating {name} model...")
|
||||||
cv_score, cv_std, overall_score, oof_pred = train_and_evaluate_model(model, X, y, n_splits=3 if quick_test else 5, model_name=name)
|
cv_score, cv_std, overall_score, oof_pred = train_and_evaluate_model(
|
||||||
|
model, X, y, n_splits=3 if quick_test else 5, model_name=name
|
||||||
|
)
|
||||||
print(f" {name} model results:")
|
print(f" {name} model results:")
|
||||||
print(f" Cross-validation R² score: {cv_score:.4f} (±{cv_std:.4f})")
|
print(f" Cross-validation R² score: {cv_score:.4f} (±{cv_std:.4f})")
|
||||||
print(f" Overall out-of-fold R² score: {overall_score:.4f}")
|
print(f" Overall out-of-fold R² score: {overall_score:.4f}")
|
||||||
@ -371,42 +466,65 @@ def analyze_and_predict_stock(symbol, start_date, end_date, future_days=30, supp
|
|||||||
model.fit(X.reshape(X.shape[0], -1), y)
|
model.fit(X.reshape(X.shape[0], -1), y)
|
||||||
train_score = model.score(X.reshape(X.shape[0], -1), y)
|
train_score = model.score(X.reshape(X.shape[0], -1), y)
|
||||||
else:
|
else:
|
||||||
history = model.fit(X, y, epochs=100, batch_size=32, verbose=1)
|
with tqdm(total=100, desc="Epochs", leave=False) as epoch_pbar:
|
||||||
train_score = 1 - history.history['loss'][-1] # Use final training loss as a proxy for R²
|
|
||||||
|
class EpochProgressCallback(Callback):
|
||||||
|
def on_epoch_end(self, epoch, logs=None):
|
||||||
|
epoch_pbar.update(1)
|
||||||
|
|
||||||
|
history = model.fit(
|
||||||
|
X,
|
||||||
|
y,
|
||||||
|
epochs=100,
|
||||||
|
batch_size=32,
|
||||||
|
verbose=0,
|
||||||
|
callbacks=[EpochProgressCallback()],
|
||||||
|
)
|
||||||
|
train_score = (
|
||||||
|
1 - history.history["loss"][-1]
|
||||||
|
) # Use final training loss as a proxy for R²
|
||||||
|
|
||||||
results[name] = model
|
results[name] = model
|
||||||
oof_predictions[name] = oof_pred
|
oof_predictions[name] = oof_pred
|
||||||
|
|
||||||
overfitting_score = train_score - overall_score
|
overfitting_score = train_score - overall_score
|
||||||
|
|
||||||
model_stats.append({
|
model_stats.append(
|
||||||
'Model': name,
|
{
|
||||||
'CV R² Score': cv_score,
|
"Model": name,
|
||||||
'CV R² Std': cv_std,
|
"CV R² Score": cv_score,
|
||||||
'OOF R² Score': overall_score,
|
"CV R² Std": cv_std,
|
||||||
'Train R² Score': train_score,
|
"OOF R² Score": overall_score,
|
||||||
'Overfitting Score': overfitting_score
|
"Train R² Score": train_score,
|
||||||
})
|
"Overfitting Score": overfitting_score,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
pbar.update(1)
|
pbar.update(1)
|
||||||
|
|
||||||
# Create a DataFrame with model statistics
|
# Create a DataFrame with model statistics
|
||||||
stats_df = pd.DataFrame(model_stats)
|
stats_df = pd.DataFrame(model_stats)
|
||||||
stats_df = stats_df.sort_values('OOF R² Score', ascending=False).reset_index(drop=True)
|
stats_df = stats_df.sort_values("OOF R² Score", ascending=False).reset_index(
|
||||||
|
drop=True
|
||||||
|
)
|
||||||
|
|
||||||
# Add overfitting indicator
|
# Add overfitting indicator
|
||||||
stats_df['Overfit'] = stats_df['Overfitting Score'].apply(lambda x: 'Yes' if x > 0.05 else 'No')
|
stats_df["Overfit"] = stats_df["Overfitting Score"].apply(
|
||||||
|
lambda x: "Yes" if x > 0.05 else "No"
|
||||||
|
)
|
||||||
|
|
||||||
# Print the table
|
# Print the table
|
||||||
print("\nModel Performance Summary:")
|
print("\nModel Performance Summary:")
|
||||||
print(tabulate(stats_df, headers='keys', tablefmt='pretty', floatfmt='.4f'))
|
print(tabulate(stats_df, headers="keys", tablefmt="pretty", floatfmt=".4f"))
|
||||||
|
|
||||||
print("\nCalculating ensemble weights...")
|
print("\nCalculating ensemble weights...")
|
||||||
ensemble_weights = calculate_ensemble_weights(models, X_test, y_test)
|
ensemble_weights = calculate_ensemble_weights(models, X_test, y_test)
|
||||||
print(f"Ensemble weights: {ensemble_weights}")
|
print(f"Ensemble weights: {ensemble_weights}")
|
||||||
|
|
||||||
print("Making ensemble predictions...")
|
print("Making ensemble predictions...")
|
||||||
ensemble_predictions = weighted_ensemble_predict([model for _, model in models], X, ensemble_weights)
|
ensemble_predictions = weighted_ensemble_predict(
|
||||||
|
[model for _, model in models], X, ensemble_weights
|
||||||
|
)
|
||||||
|
|
||||||
print(f"Predicting future data for the next {future_days} days...")
|
print(f"Predicting future data for the next {future_days} days...")
|
||||||
future_predictions = []
|
future_predictions = []
|
||||||
@ -418,67 +536,183 @@ def analyze_and_predict_stock(symbol, start_date, end_date, future_days=30, supp
|
|||||||
|
|
||||||
print("Inverse transforming predictions...")
|
print("Inverse transforming predictions...")
|
||||||
close_price_scaler = MinMaxScaler(feature_range=(0, 1))
|
close_price_scaler = MinMaxScaler(feature_range=(0, 1))
|
||||||
close_price_scaler.fit(data['Close'].values.reshape(-1, 1))
|
close_price_scaler.fit(data["Close"].values.reshape(-1, 1))
|
||||||
ensemble_predictions = close_price_scaler.inverse_transform(ensemble_predictions.reshape(-1, 1))
|
ensemble_predictions = close_price_scaler.inverse_transform(
|
||||||
future_predictions = close_price_scaler.inverse_transform(future_predictions.reshape(-1, 1))
|
ensemble_predictions.reshape(-1, 1)
|
||||||
|
)
|
||||||
|
future_predictions = close_price_scaler.inverse_transform(
|
||||||
|
future_predictions.reshape(-1, 1)
|
||||||
|
)
|
||||||
|
|
||||||
# Ensure ensemble_predictions matches the length of the actual data
|
# Ensure ensemble_predictions matches the length of the actual data
|
||||||
ensemble_predictions = ensemble_predictions[-len(data) :]
|
ensemble_predictions = ensemble_predictions[-len(data) :]
|
||||||
|
|
||||||
print("Plotting results...")
|
print("Plotting results...")
|
||||||
plt.figure(figsize=(20, 24)) # Increased figure height
|
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(20, 24))
|
||||||
|
|
||||||
# Price prediction plot
|
# Price prediction plot
|
||||||
plt.subplot(3, 1, 1)
|
|
||||||
plot_data = data.iloc[-len(ensemble_predictions) :]
|
plot_data = data.iloc[-len(ensemble_predictions) :]
|
||||||
future_dates = pd.date_range(start=plot_data.index[-1] + pd.Timedelta(days=1), periods=future_days)
|
future_dates = pd.date_range(
|
||||||
|
start=plot_data.index[-1] + pd.Timedelta(days=1), periods=future_days
|
||||||
|
)
|
||||||
|
|
||||||
plt.plot(plot_data.index, plot_data['Close'], label='Actual Price', color='blue')
|
ax1.plot(plot_data.index, plot_data["Close"], label="Actual Price", color="blue")
|
||||||
plt.plot(plot_data.index, ensemble_predictions, label='Predicted Price', color='red', linestyle='--')
|
ax1.plot(
|
||||||
plt.plot(future_dates, future_predictions, label='Future Predictions', color='green', linestyle='--')
|
plot_data.index,
|
||||||
|
ensemble_predictions,
|
||||||
|
label="Predicted Price",
|
||||||
|
color="red",
|
||||||
|
linestyle="--",
|
||||||
|
)
|
||||||
|
ax1.plot(
|
||||||
|
future_dates,
|
||||||
|
future_predictions,
|
||||||
|
label="Future Predictions",
|
||||||
|
color="green",
|
||||||
|
linestyle="--",
|
||||||
|
)
|
||||||
|
|
||||||
plt.title(f'{symbol} Stock Price Prediction')
|
# Add price indications for every day (initially invisible)
|
||||||
plt.xlabel('Date')
|
annotations = []
|
||||||
plt.ylabel('Price')
|
for i, (date, price) in enumerate(zip(plot_data.index, ensemble_predictions)):
|
||||||
plt.legend()
|
ann = ax1.annotate(
|
||||||
|
f"${price[0]:.2f}",
|
||||||
|
(date, price[0]),
|
||||||
|
xytext=(0, 10),
|
||||||
|
textcoords="offset points",
|
||||||
|
ha="center",
|
||||||
|
va="bottom",
|
||||||
|
fontsize=8,
|
||||||
|
alpha=0.7,
|
||||||
|
visible=False,
|
||||||
|
)
|
||||||
|
annotations.append(ann)
|
||||||
|
|
||||||
|
for i, (date, price) in enumerate(zip(future_dates, future_predictions)):
|
||||||
|
ann = ax1.annotate(
|
||||||
|
f"${price[0]:.2f}",
|
||||||
|
(date, price[0]),
|
||||||
|
xytext=(0, -10),
|
||||||
|
textcoords="offset points",
|
||||||
|
ha="center",
|
||||||
|
va="top",
|
||||||
|
fontsize=8,
|
||||||
|
alpha=0.7,
|
||||||
|
visible=False,
|
||||||
|
)
|
||||||
|
annotations.append(ann)
|
||||||
|
|
||||||
|
ax1.set_title(f"{symbol} Stock Price Prediction")
|
||||||
|
ax1.set_xlabel("Date")
|
||||||
|
ax1.set_ylabel("Price")
|
||||||
|
ax1.legend()
|
||||||
|
|
||||||
|
# Add hover annotation
|
||||||
|
hover_annot = ax1.annotate(
|
||||||
|
"",
|
||||||
|
xy=(0, 0),
|
||||||
|
xytext=(10, 10),
|
||||||
|
textcoords="offset points",
|
||||||
|
bbox=dict(boxstyle="round", fc="w"),
|
||||||
|
arrowprops=dict(arrowstyle="->"),
|
||||||
|
)
|
||||||
|
hover_annot.set_visible(False)
|
||||||
|
|
||||||
|
def update_hover_annot(event):
|
||||||
|
vis = hover_annot.get_visible()
|
||||||
|
if event.inaxes == ax1:
|
||||||
|
x, y = event.xdata, event.ydata
|
||||||
|
date = num2date(x).strftime("%Y-%m-%d")
|
||||||
|
hover_annot.xy = (x, y)
|
||||||
|
hover_annot.set_text(f"Date: {date}\nPrice: ${y:.2f}")
|
||||||
|
hover_annot.set_visible(True)
|
||||||
|
fig.canvas.draw_idle()
|
||||||
|
elif vis:
|
||||||
|
hover_annot.set_visible(False)
|
||||||
|
fig.canvas.draw_idle()
|
||||||
|
|
||||||
|
# Connect the hover event
|
||||||
|
fig.canvas.mpl_connect("motion_notify_event", update_hover_annot)
|
||||||
|
|
||||||
|
# Add zoom event handler
|
||||||
|
def on_zoom(event):
|
||||||
|
ax1 = event.inaxes
|
||||||
|
if ax1 is None:
|
||||||
|
return
|
||||||
|
xlim = ax1.get_xlim()
|
||||||
|
ylim = ax1.get_ylim()
|
||||||
|
|
||||||
|
# Calculate the zoom level based on the x-axis range
|
||||||
|
zoom_level = (plot_data.index[-1] - plot_data.index[0]).days / (
|
||||||
|
xlim[1] - xlim[0]
|
||||||
|
).days
|
||||||
|
|
||||||
|
# Adjust annotation visibility based on zoom level
|
||||||
|
for ann in annotations:
|
||||||
|
ann.set_visible(
|
||||||
|
zoom_level > 5
|
||||||
|
) # Show annotations when zoomed in more than 5x
|
||||||
|
|
||||||
|
fig.canvas.draw_idle()
|
||||||
|
|
||||||
|
# Connect the zoom event handler
|
||||||
|
fig.canvas.mpl_connect("motion_notify_event", on_zoom)
|
||||||
|
|
||||||
# Model performance summary table
|
# Model performance summary table
|
||||||
plt.subplot(3, 1, 2)
|
ax2.axis("off")
|
||||||
plt.axis('off')
|
table = ax2.table(
|
||||||
table = plt.table(cellText=stats_df.values,
|
cellText=stats_df.values,
|
||||||
colLabels=stats_df.columns,
|
colLabels=stats_df.columns,
|
||||||
cellLoc='center',
|
cellLoc="center",
|
||||||
loc='center')
|
loc="center",
|
||||||
|
)
|
||||||
table.auto_set_font_size(False)
|
table.auto_set_font_size(False)
|
||||||
table.set_fontsize(9)
|
table.set_fontsize(9)
|
||||||
table.scale(1, 1.5)
|
table.scale(1, 1.5)
|
||||||
|
|
||||||
# Lower the title and add more space between plot and table
|
# Lower the title and add more space between plot and table
|
||||||
plt.title('Model Performance Summary', pad=60)
|
ax2.set_title("Model Performance Summary", pad=60)
|
||||||
|
|
||||||
# Implement trading strategy
|
# Implement trading strategy
|
||||||
strategy_returns = implement_trading_strategy(plot_data['Close'].values, ensemble_predictions.flatten())
|
strategy_returns = implement_trading_strategy(
|
||||||
strategy_sharpe_ratio = np.mean(strategy_returns) / np.std(strategy_returns) * np.sqrt(252)
|
plot_data["Close"].values, ensemble_predictions.flatten()
|
||||||
|
)
|
||||||
|
strategy_sharpe_ratio = (
|
||||||
|
np.mean(strategy_returns) / np.std(strategy_returns) * np.sqrt(252)
|
||||||
|
)
|
||||||
print(f"Trading Strategy Sharpe Ratio: {strategy_sharpe_ratio:.4f}")
|
print(f"Trading Strategy Sharpe Ratio: {strategy_sharpe_ratio:.4f}")
|
||||||
|
|
||||||
# Calculate cumulative returns of the trading strategy
|
# Calculate cumulative returns of the trading strategy
|
||||||
cumulative_returns = (1 + strategy_returns).cumprod() - 1
|
cumulative_returns = (1 + strategy_returns).cumprod() - 1
|
||||||
|
|
||||||
# Add new subplot for trading strategy performance
|
# Add new subplot for trading strategy performance
|
||||||
plt.subplot(3, 1, 3)
|
ax3.plot(
|
||||||
plt.plot(plot_data.index[-len(cumulative_returns):], cumulative_returns, label='Strategy Cumulative Returns', color='purple')
|
plot_data.index[-len(cumulative_returns) :],
|
||||||
plt.title(f'{symbol} Trading Strategy Performance')
|
cumulative_returns,
|
||||||
plt.xlabel('Date')
|
label="Strategy Cumulative Returns",
|
||||||
plt.ylabel('Cumulative Returns')
|
color="purple",
|
||||||
plt.legend()
|
)
|
||||||
|
ax3.set_title(f"{symbol} Trading Strategy Performance")
|
||||||
|
ax3.set_xlabel("Date")
|
||||||
|
ax3.set_ylabel("Cumulative Returns")
|
||||||
|
ax3.legend()
|
||||||
|
|
||||||
# Add strategy Sharpe ratio as text on the plot
|
# Add strategy Sharpe ratio as text on the plot
|
||||||
plt.text(0.05, 0.95, f'Strategy Sharpe Ratio: {strategy_sharpe_ratio:.4f}',
|
ax3.text(
|
||||||
transform=plt.gca().transAxes, verticalalignment='top')
|
0.05,
|
||||||
|
0.95,
|
||||||
|
f"Strategy Sharpe Ratio: {strategy_sharpe_ratio:.4f}",
|
||||||
|
transform=ax3.transAxes,
|
||||||
|
verticalalignment="top",
|
||||||
|
)
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.savefig(f'{symbol}_prediction_with_stats_and_strategy.png', dpi=300, bbox_inches='tight')
|
plt.savefig(
|
||||||
print(f"Plot with statistics and strategy performance saved as '{symbol}_prediction_with_stats_and_strategy.png'")
|
f"{symbol}_prediction_with_stats_and_strategy.png", dpi=300, bbox_inches="tight"
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"Plot with statistics and strategy performance saved as '{symbol}_prediction_with_stats_and_strategy.png'"
|
||||||
|
)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
print(f"\nFuture predictions for the next {future_days} days:")
|
print(f"\nFuture predictions for the next {future_days} days:")
|
||||||
@ -487,35 +721,62 @@ def analyze_and_predict_stock(symbol, start_date, end_date, future_days=30, supp
|
|||||||
|
|
||||||
print("\nAnalysis and prediction completed successfully.")
|
print("\nAnalysis and prediction completed successfully.")
|
||||||
|
|
||||||
|
|
||||||
# Parse command-line arguments
|
# Parse command-line arguments
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
parser = argparse.ArgumentParser(description='Stock Price Prediction and Analysis Tool')
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Stock Price Prediction and Analysis Tool"
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument('-s', '--symbol', type=str, default='MSFT',
|
parser.add_argument(
|
||||||
help='Stock symbol to analyze (default: MSFT)')
|
"-s",
|
||||||
|
"--symbol",
|
||||||
|
type=str,
|
||||||
|
default="MSFT",
|
||||||
|
help="Stock symbol to analyze (default: MSFT)",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument('-sd', '--start_date', type=str, default='2018-01-01',
|
parser.add_argument(
|
||||||
help='Start date for historical data (default: 2018-01-01)')
|
"-sd",
|
||||||
|
"--start_date",
|
||||||
|
type=str,
|
||||||
|
default="2018-01-01",
|
||||||
|
help="Start date for historical data (default: 2018-01-01)",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument('-fd', '--future_days', type=int, default=30,
|
parser.add_argument(
|
||||||
help='Number of days to predict into the future (default: 30)')
|
"-fd",
|
||||||
|
"--future_days",
|
||||||
|
type=int,
|
||||||
|
default=30,
|
||||||
|
help="Number of days to predict into the future (default: 30)",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument('-q', '--quick_test', action='store_true',
|
parser.add_argument(
|
||||||
help='Run in quick test mode (default: False)')
|
"-q",
|
||||||
|
"--quick_test",
|
||||||
|
action="store_true",
|
||||||
|
help="Run in quick test mode (default: False)",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument('-sw', '--suppress_warnings', action='store_true',
|
parser.add_argument(
|
||||||
help='Suppress warnings (default: False)')
|
"-sw",
|
||||||
|
"--suppress_warnings",
|
||||||
|
action="store_true",
|
||||||
|
help="Suppress warnings (default: False)",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Validate start_date
|
# Validate start_date
|
||||||
try:
|
try:
|
||||||
datetime.strptime(args.start_date, '%Y-%m-%d')
|
datetime.strptime(args.start_date, "%Y-%m-%d")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
parser.error("Incorrect start date format, should be YYYY-MM-DD")
|
parser.error("Incorrect start date format, should be YYYY-MM-DD")
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
# Main execution block
|
# Main execution block
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Parse command-line arguments
|
# Parse command-line arguments
|
||||||
@ -535,6 +796,11 @@ if __name__ == "__main__":
|
|||||||
print(f"Warnings suppressed: {'Yes' if suppress_warnings_flag else 'No'}")
|
print(f"Warnings suppressed: {'Yes' if suppress_warnings_flag else 'No'}")
|
||||||
|
|
||||||
# Run the stock analysis and prediction
|
# Run the stock analysis and prediction
|
||||||
analyze_and_predict_stock(symbol, start_date, end_date, future_days,
|
analyze_and_predict_stock(
|
||||||
|
symbol,
|
||||||
|
start_date,
|
||||||
|
end_date,
|
||||||
|
future_days,
|
||||||
suppress_warnings=suppress_warnings_flag,
|
suppress_warnings=suppress_warnings_flag,
|
||||||
quick_test=quick_test_flag)
|
quick_test=quick_test_flag,
|
||||||
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user