@txoof
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
import logging
# Suppress warnings
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
# Verify GPU availability
physical_devices = tf.config.list_physical_devices('GPU')
print(f"GPUs Available: {bool(physical_devices)}")
# Load data with flexible features
def load_data(filename, features=['Close']):
df = pd.read_csv(filename, parse_dates=['DateTime'])
df['DateTime'] = pd.to_datetime(df['DateTime'], format='%Y.%m.%d %H:%M:%S')
required_columns = ['DateTime'] + features
df_cleaned = df[required_columns].dropna().sort_values('DateTime')
if df_cleaned.empty:
raise ValueError("Dataset is empty after cleaning!")
return df_cleaned, features
# Example using multiple features
selected_features = ['Open', 'High', 'Low', 'Close']
df, features = load_data('hourdata.csv', features=selected_features)
print("Using features:", features)
# Normalization
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[features])
# Sequence creation with dynamic close index
def create_sequences(data, time_steps):
X, y = [], []
close_idx = features.index('Close')
for i in range(time_steps, len(data)):
X.append(data[i-time_steps:i])
y.append(data[i, close_idx]) # Predict Close price
return np.array(X), np.array(y)
# Train/test split
def split_data(data, time_steps, split_ratio=0.8):
split_idx = int(len(data) * split_ratio)
train = data[:split_idx]
test = data[split_idx - time_steps:]
return train, test
# Enhanced prediction function
def predict_price(model, scaler, df, features, time_steps):
close_idx = features.index('Close')
last_seq = df[features].iloc[-time_steps:]
scaled_seq = scaler.transform(last_seq)
X = scaled_seq.reshape(1, time_steps, len(features))
pred_scaled = model.predict(X, verbose=0)
# Create dummy array for inverse transform
dummy_row = np.zeros((1, len(features)))
dummy_row[0, close_idx] = pred_scaled[0][0]
return scaler.inverse_transform(dummy_row)[0, close_idx]
# Time steps to evaluate
time_steps_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 50, 100]
log_df = pd.DataFrame(columns=['time_steps', 'test_loss', 'test_mae', 'current_price', 'predicted_price'])
for time_steps in time_steps_list:
print(f"\nProcessing time_steps={time_steps}")
# Data preparation
train_data, test_data = split_data(scaled_data, time_steps)
X_train, y_train = create_sequences(train_data, time_steps)
X_test, y_test = create_sequences(test_data, time_steps)
# Reshape data
X_train = X_train.reshape(-1, time_steps, len(features))
X_test = X_test.reshape(-1, time_steps, len(features))
# Model architecture
model = Sequential([
Input(shape=(time_steps, len(features))),
LSTM(100, return_sequences=True),
Dropout(0.3),
LSTM(100, return_sequences=True),
Dropout(0.3),
LSTM(50, return_sequences=True),
Dropout(0.2),
LSTM(25),
Dropout(0.2),
Dense(25, activation='relu'),
Dense(1)
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
# Training
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(
X_train, y_train,
validation_data=(X_test, y_test),
epochs=100,
batch_size=256,
verbose=0,
callbacks=[early_stop]
)
# Evaluation
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
current_price = df['Close'].iloc[-1]
predicted_price = predict_price(model, scaler, df, features, time_steps)
# Logging
new_row = pd.DataFrame([{
'time_steps': time_steps,
'test_loss': test_loss,
'test_mae': test_mae,
'current_price': current_price,
'predicted_price': predicted_price
}])
log_df = pd.concat([log_df, new_row], ignore_index=True)
# Save and display results
log_df.to_csv('multi_feature_analysis.csv', index=False)
print("\nFinal Results:")
print(log_df)
# Plot results with actual dates
split_index = int(len(df) * 0.8)
plt.figure(figsize=(14, 7))
plt.plot(df['DateTime'].iloc[split_index:], df['Close'].iloc[split_index:], label='Actual Prices')
plt.title('Actual vs Model Predictions')
plt.xlabel('DateTime')
plt.ylabel('Price')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show
I had code like this for LSTM , I have many versions and 2.16 just performed best but unfortunately I can’t recall as it was some time ago , and difference was in pip seem to be different for Mac OS and later versions .