import gymnasium as gym
from gymnasium import spaces

import os
import json
import numpy as np
import pandas as pd
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import SubprocVecEnv
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import torch.nn
from scipy.optimize import minimize

def portfolio_return(weights, returns):
    """
    weights: shape (n_assets,)
    returns: shape (n_assets,) - e.g., percentage change (0.01 for 1%)
    """
    return np.dot(weights, returns)

class PortfolioEnv(gym.Env):
    
    metadata = {'render.modes': ['human']}

    def __init__(
        self,
        data: pd.DataFrame,
        asset_cols: list,
        esg_cols: list,
        emission_cols: list,
        forecast_cols: list = None,
        lookback: int = 1,
        transaction_cost: float = 0.0005,
        alpha: float = 1.0,
        beta: float = 0.1,
        gamma: float = 0.1,
        max_weight: float = 0.25,
        number_of_selected_assets: int = 10,
        next_returns_cols: list = None,
    ):
        super(PortfolioEnv, self).__init__()

        # data and columns
        self.data = data.reset_index(drop=True)
        self.asset_cols = asset_cols
        self.esg_cols = esg_cols
        self.emission_cols = emission_cols
        self.forecast_cols = forecast_cols if forecast_cols is not None else []
        self.next_returns_cols = next_returns_cols

        self.n_assets = len(asset_cols)
        self.lookback = lookback

        # reward weights
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma

        # costs & constraints
        self.transaction_cost = transaction_cost
        self.max_weight = max_weight

        # action space: raw weights for each asset (will be normalized to sum 1 and clipped to [0, max_weight])
        self.action_space = spaces.Box(low=0.0, high=1.0, shape=(self.n_assets,), dtype=np.float32)

        # observation: concatenated vector length:
        # n_assets returns + n_assets esg + n_assets emission + n_assets forecasts (+ risks) + n_assets current_weights
        obs_len = self.n_assets * (1 + 1 + 1) + len(self.forecast_cols) + self.n_assets
        # Simplify: treat forecasts as flattened features if present
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_len,), dtype=np.float32)

        # internal state pointers
        self.curr_step = 0
        self.start_step = 0
        
        # initial portfolio: equal weights
        self.current_weights = np.ones(self.n_assets) / self.n_assets

        # prepare reusable arrays
        self._build_feature_matrix()

        self.training = True
        self.number_of_selected_assets = number_of_selected_assets

    def _build_feature_matrix(self):
        # Precompute arrays for speed
        self.returns_matrix = self.data[self.asset_cols].values # shape (T, n_assets)
        self.esg_matrix = self.data[self.esg_cols].values # shape (T, n_assets)
        self.emission_matrix = self.data[self.emission_cols].values # shape (T, n_assets)
        
        #self.vol_matrix = self.data[self.asset_cols].values # shape (T, n_assets)
        
        if self.forecast_cols:
            self.forecast_matrix = self.data[self.forecast_cols].values
        else:
            self.forecast_matrix = None

        self.next_returns_matrix = self.data[self.next_returns_cols].values # shape (T, n_assets)
        
        self.T = len(self.data)

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)

        if self.training: # training env
            self.start_step = np.random.randint(0, max(1, self.T - 1))
        else: # test env
            self.start_step = 0
        
        self.curr_step = self.start_step
        self.current_weights = np.ones(self.n_assets) / self.n_assets
    
        obs = self._get_observation()
        info = {}  # required by Gym API
    
        return obs, info

    def _get_observation(self):
        # use current step data as "latest"
        r = self.returns_matrix[self.curr_step]
        e = self.esg_matrix[self.curr_step]
        em = self.emission_matrix[self.curr_step]
        obs_list = [r, e, em]
        if self.forecast_matrix is not None:
            obs_list.append(self.forecast_matrix[self.curr_step])
            
        obs_list.append(self.current_weights)
        obs = np.concatenate(obs_list).astype(np.float32)
        return obs

    def step(self, action):
        
        raw_w = np.array(action, dtype=np.float32).flatten()
        raw_w = np.clip(raw_w, 0.0, None)
        
        # fallback
        if raw_w.sum() == 0:
            raw_w = np.ones(self.n_assets)
        
        # Select top assets (self.number_of_selected_assets) 
        K = self.number_of_selected_assets
        top_k_idx = np.argsort(raw_w)[-K:]
        
        target_w = np.zeros(self.n_assets, dtype=np.float32)
        target_w[top_k_idx] = raw_w[top_k_idx]
        
        # normilize
        target_w /= target_w.sum()
        
        # max weight constraint
        target_w = np.minimum(target_w, self.max_weight)
        
        # final normalization
        if target_w.sum() > 0:
            target_w /= target_w.sum()
        else:
            target_w[top_k_idx] = 1.0 / K

        
        # compute transaction costs based on turnover
        turnover = np.sum(np.abs(target_w - self.current_weights))
        cost = turnover * self.transaction_cost

        returns = self.next_returns_matrix[self.curr_step]  # shape (n_assets,)
        port_return = portfolio_return(target_w, returns) - cost
        
        #asset_vol = self.vol_matrix[self.curr_step]
        asset_vol=self.returns_matrix[self.curr_step]
        port_vol = np.sqrt(np.dot(target_w**2, asset_vol**2)) + 1e-6
        
        norm_return = port_return / port_vol

        # compute ESG and emission portfolio stats
        esg_port = np.dot(target_w, self.esg_matrix[self.curr_step])
        emission_port = np.dot(target_w, self.emission_matrix[self.curr_step])
        
        # normlization
        norm_esg = esg_port - 0.5
        norm_emission = emission_port - 0.5

        reward = (
            self.alpha * norm_return
            + self.beta * norm_esg
            + self.gamma * norm_emission
        )
        
        # update current weights
        self.current_weights = target_w.copy()

        # increment time
        self.curr_step += 1
        done = self.curr_step >= (self.T - 1)  # stop before index error

        obs = self._get_observation() if not done else np.zeros(self.observation_space.shape, dtype=np.float32)

        info = {
            "port_return": port_return,
            "esg": esg_port,
            "emission": emission_port,
            "turnover": turnover,
            "cost": cost,
            "weights": target_w.copy(),
            "start_step": self.start_step
        }

        terminated = done
        truncated = False
        return obs, float(reward), terminated, truncated, info

    def render(self, mode='human'):
        print(f"Step: {self.curr_step}, Weights: {self.current_weights}")

    def close(self):
        pass

from dbManager import DatabaseManager

df_returns = pd.read_csv("narrow_original_df.csv", index_col=0)
N = 40
asset_cols = list(df_returns.columns[:N])

# Create additional columns
esg_cols=[]
emission_cols=[]
forecast_cols=[]
vol_cols = []
mom_cols = []
next_returns_cols=[]
for i, c in enumerate(asset_cols):
    esg_cols.append(f'esg_{c}')
    emission_cols.append(f'em_{c}')
    forecast_cols.append(f'f_{c}')
    vol_cols.append(f"vol_12w_{c}")
    mom_cols.append(f'mom_12w_{c}')
    next_returns_cols.append(f'nr_{c}')
df_returns

# Split data
'''
LSTM:
    2002-03-06 2013-01-30
    2017-12-27 2020-06-10
RL:
    2013-01-30 2017-12-27
    2020-06-10 2022-11-23
Test (validation + evaluation):
    2022-11-23 2025-04-23

    Validation:	
        2022-11-23	2023-02-15
        2023-02-15	2023-05-10
        2023-05-10	2023-08-02
        2023-08-02	2023-11-01
        2023-11-01	2024-02-28
    	
    Evaluation:	
        2024-02-28	2024-05-22
        2024-05-22	2024-08-14
        2024-08-14	2024-11-06
        2024-11-06	2025-01-29
        2025-01-29	2025-04-16

'''
# Define date ranges

# Train
train_range_1 = ("2013-01-30", "2017-12-27")
train_range_2 = ("2020-06-10", "2022-11-23")

# Create train datasets
df_train_1 = df_returns.loc[train_range_1[0]:train_range_1[1]]
df_train_2 = df_returns.loc[train_range_2[0]:train_range_2[1]]
df_train = pd.concat([df_train_1, df_train_2]) # combine


# Validation
val_range_1=("2022-11-23","2023-02-15")
val_range_2=("2023-02-15","2023-05-10")
val_range_3=("2023-05-10","2023-08-02")
val_range_4=("2023-08-02","2023-11-01")
val_range_5=("2023-11-01","2024-02-28")
val_range_total=("2022-11-23","2024-02-28")

df_val_1 = df_returns.loc[val_range_1[0]:val_range_1[1]]
df_val_2 = df_returns.loc[val_range_2[0]:val_range_2[1]]
df_val_3 = df_returns.loc[val_range_3[0]:val_range_3[1]]
df_val_4 = df_returns.loc[val_range_4[0]:val_range_4[1]]
df_val_5 = df_returns.loc[val_range_5[0]:val_range_5[1]]
df_val_total = df_returns.loc[val_range_total[0]:val_range_total[1]]

validation_dict = {}
validation_dict["val_1"]=df_val_1
validation_dict["val_2"]=df_val_2
validation_dict["val_3"]=df_val_3
validation_dict["val_4"]=df_val_4
validation_dict["val_5"]=df_val_5
validation_dict["val_total"]=df_val_total

# Evaluation
eval_range_1=("2024-02-28","2024-05-22")
eval_range_2=("2024-05-22","2024-08-14")
eval_range_3=("2024-08-14","2024-11-06")
eval_range_4=("2024-11-06","2025-01-29")
eval_range_5=("2025-01-29","2025-04-16")
eval_range_total=("2024-02-28","2025-04-16")

df_eval_1 = df_returns.loc[eval_range_1[0]:eval_range_1[1]]
df_eval_2 = df_returns.loc[eval_range_2[0]:eval_range_2[1]]
df_eval_3 = df_returns.loc[eval_range_3[0]:eval_range_3[1]]
df_eval_4 = df_returns.loc[eval_range_4[0]:eval_range_4[1]]
df_eval_5 = df_returns.loc[eval_range_5[0]:eval_range_5[1]]
df_eval_total = df_returns.loc[eval_range_total[0]:eval_range_total[1]]

evaluation_dict = {}
evaluation_dict["eval_1"]=df_eval_1
evaluation_dict["eval_2"]=df_eval_2
evaluation_dict["eval_3"]=df_eval_3
evaluation_dict["eval_4"]=df_eval_4
evaluation_dict["eval_5"]=df_eval_5
evaluation_dict["eval_total"]=df_eval_total

import matplotlib.pyplot as plt
def plot_combined_upd(cumulative_list, total_return_list, avg_esg_list, avg_em_list, title_list, model_parameters):

    font = {'size': 8}
    plt.rc('font', **font)
    
    fig, axes = plt.subplots(2, len(cumulative_list), figsize=(5*len(cumulative_list), 6))
    
    for i, cumulative in enumerate(cumulative_list):
        
        total_return = total_return_list[i]
        avg_esg = avg_esg_list[i]
        avg_em = avg_em_list[i]
        title = title_list[i]
        
        portfolio_value = cumulative
    
        # Portfolio Value
        axes[0][i].plot(portfolio_value, linewidth=3)
        axes[0][i].set_title(f"{title}",
                          fontsize=12, fontweight="bold")
        axes[0][i].set_xlabel("Date", fontsize=10)
        axes[0][i].set_ylabel("Portfolio Value (€)", fontsize=10)
        axes[0][i].grid(True)

        axes[0][i].text(
            0.05, 0.95,
            f"PPO\nα={model_parameters['alpha']}\nβ={model_parameters['beta']}\nγ={model_parameters['gamma']}",
            transform=axes[0][i].transAxes,
            fontsize=9,
            verticalalignment='top',
            bbox=dict(facecolor='white', alpha=0.8, edgecolor='black', boxstyle='round,pad=0.5')
        )

        axes[0][i].text(
            0.76, 0.15,
            f"{'+' if total_return > 0 else ''}{total_return:.1f}%",
            transform=axes[0][i].transAxes,
            fontsize=14,
            fontweight = 'bold',
            c = 'red' if total_return < 0 else 'green',
            verticalalignment='top',
            bbox=dict(facecolor='white', alpha=0.8, edgecolor='black', boxstyle='round,pad=0.5')
        )
    
        # ESG & Emissions
        bars = axes[1][i].bar(
            ["Average ESG", "Average Emissions"],
            [avg_esg, avg_em]
        )
    
        axes[1][i].set_ylim(0, 1)
        # Different colors
        bars[0].set_color("#2ca02c")   # green for ESG
        bars[1].set_color("#d62728")   # red for emissions
    
        axes[1][i].set_title("Portfolio Sustainability Metrics",
                          fontsize=12, fontweight="bold")
        axes[1][i].set_ylabel("Score", fontsize=10)
        axes[1][i].grid(axis="y")
    
        # Add values above bars
        for bar in bars:
            height = bar.get_height()
            axes[1][i].text(
                bar.get_x() + bar.get_width()/2,
                height,
                f"{height:.2f}",
                ha='center',
                va='bottom',
                fontsize=10,
                fontweight='bold'
            )

    plt.tight_layout()
    plt.show()

# Main model settings
risk_cols = vol_cols + mom_cols

# Wrap env for stable-baselines3
def make_PPO_env(df, asset_cols, esg_cols, emission_cols, forecast_cols, risk_cols, model_parameters, model_hyperparameters, next_returns_cols, training):
    env = PortfolioEnv(
        data=df,
        asset_cols=asset_cols,
        esg_cols=esg_cols,
        emission_cols=emission_cols,
        forecast_cols=forecast_cols+risk_cols if model_hyperparameters['add_risks'] else forecast_cols,
        transaction_cost=0.001,
        alpha=model_parameters['alpha'],
        beta=model_parameters['beta'],
        gamma=model_parameters['gamma'],
        max_weight=0.25,
        number_of_selected_assets=model_parameters['number_of_selected_assets'],
        next_returns_cols=next_returns_cols,
    )
    env.training = training
    return Monitor(env)

with open('PPO_models_results.json', 'r') as f:
    results = json.load(f)
    
# 1. Define Column MultiIndex (as before)
level1_cols = np.repeat(['Validation', 'Evaluation'], 18)
level2_cols = np.tile(np.repeat(['Range 1', 'Range 2', 'Range 3', 'Range 4', 'Range 5', 'Full Range'], 3), 2)
level3_cols = np.tile(['Return (%)', 'ESG', 'CO2 score'], 12)

cols = pd.MultiIndex.from_arrays([level1_cols, level2_cols, level3_cols], names=['Type', 'Range', 'Metric'])

# 2. Define Row MultiIndex (Model name + Number of Assets)
models=[]
for r_index, (r_key, r_item) in enumerate(results.items()):
        model_parameters = r_item['model_parameters'].copy()
        models.append(f"PPO MODEL {r_index+1} (α={model_parameters['alpha']}, β={model_parameters['beta']}, γ={model_parameters['gamma']})")
    
models.append("MPT (full market)")
models.append("MPT (40 assets)")
asset_counts = [7, 10, 15]

# Create all combinations of Model and Asset Count for the rows
row_index = pd.MultiIndex.from_product([models, asset_counts], names=['Model', 'NSAs'])

# 3. Initialize the DataFrame with 37 columns
test_results_df = pd.DataFrame(np.nan, index=row_index, columns=cols)


# Evaluation
test_ranges = {"Validation": validation_dict, "Evaluation": evaluation_dict}

for tr_key, tr_item in test_ranges.items():
    # Create new branch

    print("".join(["=" for _ in range(150)]))
    print(tr_key)
    
    for r_index, (r_key, r_item) in enumerate(results.items()):

        individual = r_item['elits_list'][0][1]
        vecnorm_path = r_item['elits_list'][0][2]
        model_path = r_item['elits_list'][0][3]
        model_parameters = r_item['model_parameters'].copy()
    
        print("\n")
        print("".join(["=" for _ in range(100)]))
        print(f"\nPPO MODEL {r_index+1}\n(α={model_parameters['alpha']}, β={model_parameters['beta']}, γ={model_parameters['gamma']})\n")
        
        for nsa in [7,10,15]:

            model_label = f"PPO MODEL {r_index+1} (α={model_parameters['alpha']}, β={model_parameters['beta']}, γ={model_parameters['gamma']})"
            current_row = (model_label, nsa)
        
            model_parameters['number_of_selected_assets'] = nsa
        
            cumulative_list = []
            total_return_list = []
            avg_esg_list = []
            avg_em_list = []
            title_list = []
        
            for i, (eval_key, df_eval_item) in enumerate(tr_item.items()):

                # Crete env
                test_env = DummyVecEnv([
                    lambda: make_PPO_env(
                        df_eval_item,
                        asset_cols,
                        esg_cols,
                        emission_cols,
                        forecast_cols,
                        risk_cols,
                        model_parameters,
                        individual,
                        next_returns_cols,
                        training=False
                    )
                ])
                
                test_env = VecNormalize.load(vecnorm_path, test_env)
                test_env.norm_reward = False
                
                # Load model
                model = PPO.load(model_path, env=test_env, device="cpu")
                
                weights_history = []
                reward_history = []
                return_history = []
                esg_history = []
                emission_history = []
                
                obs = test_env.reset()
                done = False
                
                while not done:
                    action, _ = model.predict(obs, deterministic=True)
                    obs, rewards, dones, infos = test_env.step(action)
                
                    # Store data
                    weights_history.append(infos[0]["weights"])
                    reward_history.append(rewards[0])
                    return_history.append(infos[0]["port_return"])
                    esg_history.append(infos[0]["esg"])
                    emission_history.append(infos[0]["emission"])
                
                    done = dones[0]
                
                initial_capital = 10000
                portfolio_value = initial_capital
                portfolio_values = [portfolio_value]
                
                for r in return_history:
                    portfolio_value *= (1 + r/100)
                    portfolio_values.append(portfolio_value)
                    
                title = f"{tr_key} {i+1 if i<5 else '(full range)'}  | {model_parameters['number_of_selected_assets']} Assets"
            
                total_return = (portfolio_values[-1]/portfolio_values[0]-1)*100
                
                df_portfolio = pd.DataFrame(
                    data={'Value': portfolio_values}, 
                    index=df_eval_item.index
                )
                
                cumulative_list.append(df_portfolio)
                total_return_list.append(total_return)
                avg_esg_list.append(np.mean(esg_history))
                avg_em_list.append(np.mean(emission_history))
                title_list.append(title)

                # save results

                type_label = tr_key
                range_label = f'Range {(i % 6) + 1}' if i % 6 < 5 else 'Full Range'
                
                # Fill the metrics
                test_results_df.at[current_row, (type_label, range_label, 'Return (%)')] = round(total_return,1)
                test_results_df.at[current_row, (type_label, range_label, 'ESG')] = round(np.mean(esg_history),2)
                test_results_df.at[current_row, (type_label, range_label, 'CO2 score')] = round(np.mean(emission_history),2)
                
                # save weights
                weights_df = pd.DataFrame(
                    weights_history,
                    columns=asset_cols,
                    index=df_eval_item.index[:-1]
                )
                
                weights_df["reward"] = reward_history
                weights_df["return"] = return_history
                weights_df["esg"] = esg_history
                weights_df["emission"] = emission_history
                
                #weights_df.to_csv(f'weights_df_{r_index}_{eval_key}.csv', index = True) #, sep = ";", decimal = ",")
                
            print("".join(["-" for _ in range(100)]))
            print(f"{nsa} Assets:")
            plot_combined_upd(cumulative_list, total_return_list, avg_esg_list, avg_em_list, title_list, model_parameters)

======================================================================================================================================================
Validation


====================================================================================================

PPO MODEL 1
(α=1, β=0, γ=0)

----------------------------------------------------------------------------------------------------
7 Assets:

----------------------------------------------------------------------------------------------------
10 Assets:

----------------------------------------------------------------------------------------------------
15 Assets:


====================================================================================================

PPO MODEL 2
(α=1, β=0.5, γ=0.5)

----------------------------------------------------------------------------------------------------
7 Assets:

----------------------------------------------------------------------------------------------------
10 Assets:

----------------------------------------------------------------------------------------------------
15 Assets:


====================================================================================================

PPO MODEL 3
(α=1, β=1, γ=1)

----------------------------------------------------------------------------------------------------
7 Assets:

----------------------------------------------------------------------------------------------------
10 Assets:

----------------------------------------------------------------------------------------------------
15 Assets:


====================================================================================================

PPO MODEL 4
(α=0.1, β=1, γ=1)

----------------------------------------------------------------------------------------------------
7 Assets:

def optimize_portfolio(returns, n_assets_select, lookback=52*5):

    # use only historical window
    returns = returns.tail(lookback)

    mu = returns.mean()*52 # annualized return
    cov = returns.cov()*52 # annualized covariance

    # Select top N assets by Sharpe proxy (mean / std)
    sharpe_proxy = mu / returns.std()
    selected_assets = sharpe_proxy.sort_values(ascending=False).head(n_assets_select).index
    mu_sel = mu[selected_assets]
    cov_sel = cov.loc[selected_assets, selected_assets]

    def neg_sharpe(w):
        port_return = np.dot(w, mu_sel)
        port_vol = np.sqrt(np.dot(w.T, np.dot(cov_sel, w)))
        return -port_return / port_vol

    constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1})
    bounds = tuple((0, 0.25) for _ in range(n_assets_select))
    init_guess = np.ones(n_assets_select) / n_assets_select

    result = minimize(neg_sharpe, init_guess, method='SLSQP',
                      bounds=bounds, constraints=constraints)

    weights = result.x

    return selected_assets, weights

def evaluate_portfolio(eval_df, selected_assets, weights):

    port_returns = eval_df[selected_assets].dot(weights)
    
    cost = 0.001
    port_returns.iloc[0] -= cost

    cumulative = (1 + port_returns).cumprod()
    total_return = (cumulative.iloc[-1] - 1) * 100

    return port_returns, cumulative, total_return

def compute_esg_em(selected_assets, weights, df_esg, df_em, eval_df):

    esg_values = []
    em_values = []

    for asset, w in zip(selected_assets, weights):
        esg_series = df_esg[f"esg_{asset}"].loc[eval_df.index]
        em_series = df_em[f"em_{asset}"].loc[eval_df.index]

        esg_values.append(w * esg_series.mean())
        em_values.append(w * em_series.mean())

    avg_esg = np.sum(esg_values)
    avg_em = np.sum(em_values)

    return avg_esg, avg_em

def plot_combined_mpt(cumulative_list, total_return_list, avg_esg_list, avg_em_list, title_list):

    font = {'size': 8}
    plt.rc('font', **font)
    
    fig, axes = plt.subplots(2, len(cumulative_list), figsize=(5*len(cumulative_list), 6))
    
    for i, cumulative in enumerate(cumulative_list):
        
        total_return = total_return_list[i]
        avg_esg = avg_esg_list[i]
        avg_em = avg_em_list[i]
        title = title_list[i]
        
        start_value = 10000
        portfolio_value = start_value * cumulative
    
        # Portfolio Value
        axes[0][i].plot(portfolio_value, linewidth=3)
        axes[0][i].set_title(f"{title}", fontsize=12, fontweight="bold")
        axes[0][i].set_xlabel("Date", fontsize=10)
        axes[0][i].set_ylabel("Portfolio Value (€)", fontsize=10)
        axes[0][i].grid(True)

        axes[0][i].text(
            0.05, 0.95,
            "MPT",
            transform=axes[0][i].transAxes,
            fontsize=9,
            verticalalignment='top',
            bbox=dict(facecolor='white', alpha=0.8, edgecolor='black', boxstyle='round,pad=0.5')
        )
        
        axes[0][i].text(
            0.76, 0.15,
            f"{'+' if total_return > 0 else ''}{total_return:.1f}%",
            transform=axes[0][i].transAxes,
            fontsize=14,
            fontweight = 'bold',
            c = 'red' if total_return < 0 else 'green',
            verticalalignment='top',
            bbox=dict(facecolor='white', alpha=0.8, edgecolor='black', boxstyle='round,pad=0.5')
        )
    
        # ESG & Emissions
        bars = axes[1][i].bar(
            ["Average ESG", "Average Emissions"],
            [avg_esg, avg_em]
        )
    
        axes[1][i].set_ylim(0, 1)
        # Different colors
        bars[0].set_color("#2ca02c")   # green for ESG
        bars[1].set_color("#d62728")   # red for emissions
    
        axes[1][i].set_title("Portfolio Sustainability Metrics",
                          fontsize=12, fontweight="bold")
        axes[1][i].set_ylabel("Score", fontsize=10)
        axes[1][i].grid(axis="y")
    
        # Add values above bars
        for bar in bars:
            height = bar.get_height()
            axes[1][i].text(
                bar.get_x() + bar.get_width()/2,
                height,
                f"{height:.2f}",
                ha='center',
                va='bottom',
                fontsize=10,
                fontweight='bold'
            )

    plt.tight_layout()
    plt.show()

validation_ranges = [
    ("2022-11-23","2023-02-15"),
    ("2023-02-15","2023-05-10"),
    ("2023-05-10","2023-08-02"),
    ("2023-08-02","2023-11-01"),
    ("2023-11-01","2024-02-28"),
    ("2022-11-23","2024-02-28")
]

evaluation_ranges = [
    ("2024-02-28","2024-05-22"),
    ("2024-05-22","2024-08-14"),
    ("2024-08-14","2024-11-06"),
    ("2024-11-06","2025-01-29"),
    ("2025-01-29","2025-04-16"),
    ("2024-02-28","2025-04-16")
]

df_dict_mpt = {"MPT (full market)": {}, "MPT (40 assets)": {}}

# FULL DF
# Load dataset
df_full = pd.read_csv("full_dataset.csv", index_col=0, parse_dates=True)
asset_prices = df_full.iloc[:, :140]
asset_returns_full = asset_prices.pct_change().fillna(0.0)

df_dict_mpt['MPT (full market)'] = {'df': df_full, 'asset_returns': asset_returns_full}

# NARROW DF
# Load dataset
df_narrow = pd.read_csv("narrow_original_df.csv", index_col=0, parse_dates=True)
asset_returns_narrow = df_narrow.iloc[:, :40]
asset_returns_narrow = asset_returns_narrow/100

df_dict_mpt['MPT (40 assets)'] = {'df': df_narrow, 'asset_returns': asset_returns_narrow}


test_ranges_dict = {'Validation': validation_ranges, 'Evaluation': evaluation_ranges}
portfolio_sizes = [7, 10, 15]

for df_key, df_item in df_dict_mpt.items():

    print(df_key)
    
    df = df_item['df'].copy()
    asset_returns = df_item['asset_returns'].copy()
    
    asset_names = asset_returns.columns.tolist()
    
    # Extract ESG and Emission columns
    esg_cols = [f"esg_{a}" for a in asset_names]
    em_cols = [f"em_{a}" for a in asset_names]
    
    df_esg = df[esg_cols]
    df_em = df[em_cols]
    
    for tr_key, tr_item in test_ranges_dict.items():
        test_ranges = tr_item
        for n_assets in portfolio_sizes:
            
            model_label = df_key
            current_row = (model_label, n_assets)
        
            cumulative_list = []
            total_return_list = []
            avg_esg_list = []
            avg_em_list = []
            title_list = []
            
            #for n_assets in portfolio_sizes:
            for i, eval_range in enumerate(test_ranges, 1):
                train_df = asset_returns.loc[:eval_range[0]]
                df_eval  = asset_returns.loc[eval_range[0]:eval_range[1]]
                
                selected_assets, weights = optimize_portfolio(train_df, n_assets, lookback=52*5)
        
                port_returns, cumulative, total_return = evaluate_portfolio(df_eval, selected_assets, weights)
        
                avg_esg, avg_em = compute_esg_em(selected_assets, weights, df_esg, df_em, df_eval)

                if i==6:
                    title = f"{tr_key} (full range) | {n_assets} Assets"
                else:
                    title = f"{tr_key} {i} | {n_assets} Assets"
        
                cumulative_list.append(cumulative)
                total_return_list.append(total_return)
                avg_esg_list.append(avg_esg)
                avg_em_list.append(avg_em)
                title_list.append(title)
    
                #save to main results df
                type_label = tr_key
                range_label = f'Range {(i-1 % 6) + 1}' if i-1 % 6 < 5 else 'Full Range'
                
                # Fill the metrics
                test_results_df.at[current_row, (type_label, range_label, 'Return (%)')] = round(total_return,1)
                test_results_df.at[current_row, (type_label, range_label, 'ESG')] = round(avg_esg,2)
                test_results_df.at[current_row, (type_label, range_label, 'CO2 score')] = round(avg_em,2)
                
                
            print(f'\n\n{n_assets} Assets')
            plot_combined_mpt(cumulative_list, total_return_list, avg_esg_list, avg_em_list, title_list)

MPT (full market)


7 Assets


10 Assets


15 Assets


7 Assets


10 Assets


15 Assets

MPT (40 assets)


7 Assets


10 Assets


15 Assets


7 Assets

idx = pd.IndexSlice

styled_df = test_results_df.style

# 2. Define colors for heatmaps
blue_cmap = "Blues"
green_cmap = "Greens"
orange_cmap = "Oranges"

# Custom function for Red-White-Blue Heatmap based on zero
def color_negative_positive(val):
    if val < 0:
        # Higher negative = darker red
        intensity = min(abs(val) / 50, 1) # Cap intensity
        color = f'rgba(255, {255-intensity*150}, {255-intensity*150}, 0.5)'
        return f'background-color: {color}; color: red'
    elif val > 0:
        # Higher positive = darker blue
        intensity = min(val / 50, 1) # Cap intensity
        color = f'rgba({255-intensity*150}, {255-intensity*150}, 255, 0.5)'
        return f'background-color: {color}; color: blue'
    return ''


# 1. Define slices for the two sections
idx = pd.IndexSlice

# --- VALIDATION SECTION ---
# Select columns where Type is 'Validation'
val_df = test_results_df.loc[:, idx['Validation', :, :]]

# Apply styling to Validation subset
styled_val = val_df.style.set_table_styles([
    {'selector': 'th', 'props': [('text-align', 'center'), ('font-weight', 'bold'), ('border', '1px solid #ccc')]},
    {'selector': 'td', 'props': [('text-align', 'center'), ('border', '1px solid #eee')]},
    {'selector': 'th.row_heading', 'props': [('background-color', '#f2f2f2'), ('width', '80px')]},
    {'selector': 'th.col_heading', 'props': [('background-color', '#e6f7ff')]}
])
styled_val.set_properties(**{'width': '80px', 'min-width': '80px'})
# Apply heatmaps and formatting (same logic as before)
for col in val_df.columns:
    metric = col[2]
    if metric == 'Return (%)':
        styled_val.map(color_negative_positive, subset=pd.IndexSlice[:, [col]])
        styled_val.format("{:+.1f}%", subset=pd.IndexSlice[:, [col]])
    elif metric == 'ESG':
        styled_val.background_gradient(cmap="Greens", subset=pd.IndexSlice[:, [col]])
        styled_val.format("{:.2f}", subset=pd.IndexSlice[:, [col]])
    elif metric == 'CO2 score':
        styled_val.background_gradient(cmap="Oranges", subset=pd.IndexSlice[:, [col]])
        styled_val.format("{:.2f}", subset=pd.IndexSlice[:, [col]])

styled_val.set_properties(**{'width': '35px', 'min-width': '35px'})

# --- EVALUATION SECTION ---
# Select columns where Type is 'Evaluation'
eval_df = test_results_df.loc[:, idx['Evaluation', :, :]]

# Apply styling to Evaluation subset (same styling rules)
styled_eval = eval_df.style.set_table_styles([
    {'selector': 'th', 'props': [('text-align', 'center'), ('font-weight', 'bold'), ('border', '1px solid #ccc')]},
    {'selector': 'td', 'props': [('text-align', 'center'), ('border', '1px solid #eee')]},
    {'selector': 'th.row_heading', 'props': [('background-color', '#f2f2f2'), ('width', '80px')]},
    {'selector': 'th.col_heading', 'props': [('background-color', '#e6f7ff')]}
])
styled_eval.set_properties(**{'width': '80px', 'min-width': '80px'})
# Apply heatmaps and formatting (same logic as before)
for col in eval_df.columns:
    metric = col[2]
    if metric == 'Return (%)':
        styled_eval.map(color_negative_positive, subset=pd.IndexSlice[:, [col]])
        styled_eval.format("{:+.1f}%", subset=pd.IndexSlice[:, [col]])
    elif metric == 'ESG':
        styled_eval.background_gradient(cmap="Greens", subset=pd.IndexSlice[:, [col]])
        styled_eval.format("{:.2f}", subset=pd.IndexSlice[:, [col]])
    elif metric == 'CO2 score':
        styled_eval.background_gradient(cmap="Oranges", subset=pd.IndexSlice[:, [col]])
        styled_eval.format("{:.2f}", subset=pd.IndexSlice[:, [col]])

styled_eval.set_properties(**{'width': '35px', 'min-width': '35px'})

# Display them separately
print("Validation Results:")
display(styled_val)

print("\nEvaluation Results:")
display(styled_eval)

with open("validation_results.html", "w") as f:
    f.write(styled_val.to_html())

# Save Evaluation Table
with open("evaluation_results.html", "w") as f:
    f.write(styled_eval.to_html())

Validation Results:

Evaluation Results:

	ANORA.HE	APETIT.HE	CONSTI.HE	CTY1S.HE	ELISA.HE	EQV1V.HE	ETTE.HE	FORTUM.HE	HUH1V.HE	KAMUX.HE	...	nr_SRV1V.HE	nr_PUUILO.HE	nr_LAT1V.HE	nr_HUH1V.HE	nr_KOJAMO.HE	nr_MUSTI.HE	nr_NDA-FI.HE	nr_QTCOM.HE	nr_EQV1V.HE	nr_RAUTE.HE
2002-03-06	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.065327	2.096031	-1.313631	-0.232981	2.232757	0.050976	0.000000	3.917531	0.557474	0.819605
2002-03-13	0.261424	-0.585733	1.536791	0.120700	1.398019	0.557474	0.000000	0.246757	-0.232981	0.574108	...	0.076280	0.714703	-0.998335	0.163470	2.774606	-0.192394	0.000000	-3.785640	-0.554383	-0.344768
2002-03-20	-0.253345	-0.995070	1.921467	0.060279	1.321591	-0.554383	0.000000	0.186734	0.163470	-0.928898	...	0.114998	0.880827	-0.168070	0.173387	1.744822	-0.321042	0.000000	-10.568203	0.557474	0.108315
2002-03-27	-0.424370	-0.330617	1.499279	0.050973	0.049354	0.557474	0.642979	0.420977	0.173387	-1.879564	...	0.056128	0.076131	2.188558	0.678648	-0.287063	-0.288267	0.558216	-7.556225	0.000000	-0.594280
2002-04-03	-0.433261	0.000000	0.321153	-0.069473	0.211413	0.000000	0.072404	0.535778	0.678648	-0.522495	...	-0.044113	-0.844244	4.142155	0.408327	-0.695015	0.076632	0.493438	-5.057616	0.554383	1.890963
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2025-03-19	-0.618165	0.345224	1.511170	-1.245915	1.241318	0.306748	1.464129	-0.088145	-1.428837	0.000000	...	-0.126263	4.570384	0.409840	-1.807229	-1.782915	-0.326531	-1.679587	-1.700187	-0.305810	2.229299
2025-03-26	0.000000	-0.458716	2.427184	0.206825	0.072982	-0.305810	1.154401	1.224085	-1.807229	1.881720	...	0.632111	0.407925	-0.656546	-2.089724	1.496110	0.737101	-2.798949	-0.717148	-0.306748	0.726895
2025-04-02	0.813397	0.230415	2.369668	1.362229	-0.525088	-0.306748	1.711840	-3.039547	-2.089724	-0.131926	...	-3.329146	-2.292513	-2.740846	-4.621108	0.864780	-0.829268	-7.827498	-6.947100	-3.664615	-5.876289
2025-04-09	-1.756051	-0.689655	-3.148148	-2.239870	-2.492669	-3.664615	-1.823282	-5.842697	-4.621108	-3.764861	...	-2.664068	0.801901	0.763313	-1.724492	4.345284	0.491884	-2.405397	-3.926941	-1.775847	-3.176342
2025-04-16	-2.898551	-1.847222	-2.549395	0.562383	0.676692	-1.775847	-0.857143	-4.391408	-1.724492	-3.225806	...	-0.400534	2.946376	1.774908	0.000000	4.257703	-0.244738	-0.390742	-2.875475	-1.313693	0.791855

	Type	Validation
	Range	Range 1			Range 2			Range 3			Range 4			Range 5			Full Range
	Metric	Return (%)	ESG	CO2 score	Return (%)	ESG	CO2 score	Return (%)	ESG	CO2 score	Return (%)	ESG	CO2 score	Return (%)	ESG	CO2 score	Return (%)	ESG	CO2 score
Model	NSAs
PPO MODEL 1 (α=1, β=0, γ=0)	7	+10.9%	0.42	0.85	+4.7%	0.43	0.79	-3.8%	0.40	0.84	-5.3%	0.42	0.83	+30.0%	0.39	0.85	+36.7%	0.41	0.83
	10	+10.7%	0.43	0.84	+2.9%	0.44	0.79	-4.0%	0.40	0.84	-5.0%	0.42	0.82	+29.3%	0.39	0.86	+33.9%	0.42	0.83
	15	+10.2%	0.43	0.84	+2.6%	0.44	0.79	-3.6%	0.41	0.83	-5.4%	0.43	0.82	+28.4%	0.40	0.85	+32.4%	0.42	0.83
PPO MODEL 2 (α=1, β=0.5, γ=0.5)	7	+13.0%	0.52	0.90	-4.0%	0.52	0.89	-4.5%	0.51	0.87	-5.9%	0.51	0.88	+27.4%	0.53	0.88	+23.6%	0.52	0.88
	10	+12.2%	0.52	0.89	-3.4%	0.51	0.88	-5.9%	0.52	0.88	-5.4%	0.50	0.87	+28.4%	0.52	0.88	+23.1%	0.52	0.88
	15	+11.1%	0.51	0.89	-3.5%	0.51	0.88	-5.7%	0.51	0.87	-6.1%	0.50	0.87	+24.4%	0.51	0.88	+17.9%	0.51	0.88
PPO MODEL 3 (α=1, β=1, γ=1)	7	+12.5%	0.48	0.89	-4.4%	0.51	0.88	-8.4%	0.54	0.86	-6.2%	0.51	0.88	+25.1%	0.56	0.89	+16.2%	0.53	0.88
	10	+10.6%	0.48	0.88	-3.2%	0.52	0.88	-8.8%	0.53	0.87	-5.8%	0.50	0.88	+21.2%	0.54	0.89	+12.8%	0.52	0.88
	15	+11.0%	0.48	0.87	-3.1%	0.52	0.87	-8.1%	0.53	0.86	-5.8%	0.50	0.87	+19.4%	0.54	0.88	+11.8%	0.52	0.87
PPO MODEL 4 (α=0.1, β=1, γ=1)	7	+6.8%	0.78	0.91	-10.2%	0.78	0.91	-8.5%	0.78	0.91	-5.8%	0.78	0.91	+0.7%	0.78	0.91	-16.7%	0.78	0.91
	10	+6.4%	0.76	0.91	-9.0%	0.76	0.90	-8.8%	0.76	0.91	-5.4%	0.75	0.91	+1.9%	0.75	0.91	-15.0%	0.75	0.91
	15	+6.0%	0.73	0.90	-8.6%	0.73	0.90	-8.4%	0.73	0.90	-5.0%	0.73	0.90	+2.6%	0.73	0.90	-13.4%	0.73	0.90
MPT (full market)	7	+7.8%	0.43	0.95	-14.1%	0.43	0.95	-3.3%	0.48	0.95	-18.1%	0.47	0.95	+9.2%	0.56	0.95	-26.6%	0.43	0.95
	10	+10.9%	0.37	0.95	-13.3%	0.38	0.95	-4.5%	0.39	0.95	-16.8%	0.41	0.95	+8.9%	0.49	0.95	-21.5%	0.37	0.95
	15	+10.9%	0.37	0.95	-13.4%	0.37	0.95	-4.2%	0.38	0.95	-16.7%	0.38	0.95	+8.8%	0.46	0.95	-21.7%	0.37	0.95
MPT (40 assets)	7	+10.7%	0.55	0.70	+3.4%	0.54	0.71	-1.8%	0.48	0.73	-15.0%	0.49	0.92	+3.6%	0.51	0.94	+5.0%	0.55	0.70
	10	+10.7%	0.55	0.71	+2.5%	0.56	0.76	-3.1%	0.48	0.78	-15.3%	0.49	0.87	+3.0%	0.48	0.94	+4.5%	0.55	0.71
	15	+9.0%	0.54	0.81	+6.4%	0.54	0.82	-3.2%	0.49	0.78	-15.3%	0.49	0.87	+3.0%	0.48	0.94	-0.1%	0.54	0.81

	Type	Evaluation
	Range	Range 1			Range 2			Range 3			Range 4			Range 5			Full Range
	Metric	Return (%)	ESG	CO2 score	Return (%)	ESG	CO2 score	Return (%)	ESG	CO2 score	Return (%)	ESG	CO2 score	Return (%)	ESG	CO2 score	Return (%)	ESG	CO2 score
Model	NSAs
PPO MODEL 1 (α=1, β=0, γ=0)	7	+19.3%	0.41	0.87	+4.0%	0.41	0.82	+0.2%	0.40	0.82	+3.4%	0.38	0.85	+8.8%	0.41	0.78	+43.1%	0.40	0.83
	10	+16.9%	0.41	0.86	+4.0%	0.41	0.82	+0.4%	0.41	0.82	+3.8%	0.39	0.85	+7.9%	0.41	0.79	+38.4%	0.41	0.83
	15	+15.2%	0.41	0.86	+3.3%	0.41	0.82	-0.6%	0.41	0.81	+2.7%	0.40	0.85	+7.1%	0.42	0.80	+30.9%	0.41	0.83
PPO MODEL 2 (α=1, β=0.5, γ=0.5)	7	+9.2%	0.51	0.90	+2.3%	0.57	0.90	-0.6%	0.56	0.89	+2.7%	0.53	0.89	+5.9%	0.52	0.90	+20.0%	0.54	0.89
	10	+10.0%	0.50	0.89	+2.2%	0.55	0.89	-1.6%	0.54	0.89	+2.0%	0.53	0.88	+6.1%	0.52	0.89	+20.4%	0.53	0.89
	15	+10.5%	0.49	0.88	+2.4%	0.54	0.88	-0.9%	0.54	0.88	+1.7%	0.52	0.88	+5.7%	0.51	0.89	+20.8%	0.52	0.88
PPO MODEL 3 (α=1, β=1, γ=1)	7	+11.7%	0.64	0.87	+6.8%	0.71	0.89	+0.0%	0.66	0.88	-1.5%	0.68	0.88	-1.2%	0.65	0.90	+17.3%	0.67	0.88
	10	+11.9%	0.62	0.87	+6.4%	0.67	0.88	+0.2%	0.63	0.88	-2.0%	0.64	0.88	-0.6%	0.62	0.89	+16.6%	0.64	0.88
	15	+11.5%	0.61	0.87	+5.5%	0.64	0.88	+0.2%	0.60	0.88	-1.8%	0.61	0.88	-0.8%	0.60	0.89	+15.3%	0.61	0.88
PPO MODEL 4 (α=0.1, β=1, γ=1)	7	+11.4%	0.78	0.91	+3.5%	0.78	0.91	+0.3%	0.78	0.91	-3.1%	0.78	0.91	-4.5%	0.78	0.91	+6.7%	0.78	0.91
	10	+10.7%	0.75	0.91	+3.8%	0.76	0.91	+0.6%	0.76	0.91	-2.8%	0.76	0.91	-4.5%	0.75	0.91	+7.3%	0.75	0.91
	15	+10.2%	0.73	0.90	+3.5%	0.73	0.90	+0.6%	0.73	0.90	-2.6%	0.73	0.90	-3.5%	0.73	0.90	+7.7%	0.73	0.90
MPT (full market)	7	+3.7%	0.50	0.95	-0.8%	0.44	0.95	+4.8%	0.42	0.90	+4.2%	0.43	0.90	-9.0%	0.43	0.95	-9.6%	0.50	0.95
	10	+4.6%	0.46	0.91	-0.2%	0.42	0.91	+4.4%	0.42	0.90	+3.8%	0.44	0.90	-9.4%	0.41	0.90	-5.9%	0.46	0.91
	15	+4.5%	0.46	0.91	-0.2%	0.42	0.91	+4.4%	0.42	0.90	+4.5%	0.43	0.90	-9.4%	0.41	0.90	-6.1%	0.46	0.91
MPT (40 assets)	7	+3.8%	0.41	0.94	+1.9%	0.40	0.91	-1.4%	0.36	0.92	-4.9%	0.39	0.88	+0.6%	0.39	0.90	-15.9%	0.41	0.94
	10	+4.6%	0.37	0.93	+1.2%	0.41	0.93	-1.6%	0.38	0.91	-4.9%	0.39	0.88	+0.6%	0.39	0.90	-8.1%	0.37	0.93
	15	+4.6%	0.37	0.93	-4.5%	0.38	0.93	-0.8%	0.40	0.91	-4.8%	0.39	0.88	+1.7%	0.38	0.90	-8.1%	0.37	0.93

Appendix 5. Models' Evaluation And Results Comparison With MPT

Gym Environment

Loading data

MPT