from sklearn.datasets import make_blobs
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d.art3d import Line3DCollection


import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
from sklearn.metrics.pairwise import cosine_distances
from sklearn.manifold import MDS
import numpy as np
import torch
from scipy.spatial import distance_matrix
import matplotlib.pyplot as plt
import time
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import numpy as np
import pandas as pd
from sklearn.metrics import silhouette_score, euclidean_distances, pairwise_distances
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_distances, manhattan_distances
from scipy.stats import pearsonr
from sklearn.manifold import MDS
import numpy as np
import pandas as pd
from sklearn.metrics import silhouette_score, euclidean_distances
from sklearn.neighbors import NearestNeighbors
from scipy.stats import pearsonr
from sklearn.metrics.pairwise import cosine_distances, manhattan_distances
from sklearn.manifold import MDS
import skdim
import torch
from sklearn.metrics.pairwise import cosine_distances
import os


def d_cos_distance_matrix(D):
    return cosine_distances(D) / 2


def d_circ_distance_matrix_torch(P):
    m = torch.cdist(P, P)
    return 2 * torch.fmin(m, 1 - m)

def loss_torch(M, P):
    return torch.sum(torch.abs(M - d_circ_distance_matrix_torch(P))) / P.shape[0]**2



def gradient_descent(D, n_iter=100, learning_rate=0.1):
    import time as ti
    start = ti.time()
    H = []
    M = torch.from_numpy(d_cos_distance_matrix(D))
    P = torch.reshape(torch.rand(D.shape[0]), (D.shape[0], 1))
    P.requires_grad_()
    optimizer = torch.optim.Adam([P], lr=learning_rate)
    for i in range(n_iter):
        optimizer.zero_grad()
        loss = loss_torch(M, P)
        H.append(loss.detach().clone())
        loss.backward(retain_graph=True)
        optimizer.step()
        # Basic Gradient Descent only for use without optimizer
        # Requires update of parameters
        #with torch.no_grad():
        #    init.sub_(init.grad * learning_rate)
        #    init.grad.zero_()
    d = pd.DataFrame(data=P.detach().clone(), columns=['ori'])
    d['x'] = d['ori'].apply(lambda x: np.cos(x * 2 * np.pi))
    d['y'] = d['ori'].apply(lambda x: np.sin(x * 2 * np.pi))
    d['target'] = y
    return {
        "data_prepared": d,
        "points": P.detach().clone(),
        "loss": loss.detach().clone(),
        "loss_history": H,
        "time": ti.time() - start
    }

def apply_custom_projection(X,y, n_iter=100, learning_rate=0.01):
    import time as tim
    def torch_loss(x, t_hd_dist_mat):
        m = torch.cdist(x, x)
        return torch.mean(torch.abs(t_hd_dist_mat - 2*torch.min(m, 1 - m)))

    N, _ = X.shape
    hd_dist_mat = distance_matrix(X, X) / 2 # cosine!
    t_hd_dist_mat = torch.from_numpy(hd_dist_mat)
    
    init = torch.rand(N, 1, requires_grad=True)
    
    optimizer = torch.optim.Adam([init], lr=learning_rate)
    loss_history = []

    import time as time_module
    start = time_module.time()

    for i in range(n_iter):
        optimizer.zero_grad()
        loss = torch_loss(init, t_hd_dist_mat)
        loss.backward()
        optimizer.step()
        loss_history.append(loss.item())

    print(f"Final Loss: {loss.item()}")
    print(f"Optimization Time: {tim.time() - start:.2f} seconds")

    d = pd.DataFrame(data=init.detach().numpy(), columns=['ori'])
    d['x'] = d['ori'].apply(lambda x: np.cos(x * 2 * np.pi))
    d['y'] = d['ori'].apply(lambda x: np.sin(x * 2 * np.pi))
    d['target'] = y

    return d, loss_history


from sklearn.preprocessing import StandardScaler

def apply_2d_mds(X):
    scaler = StandardScaler()
    X_std = scaler.fit_transform(X)  # Standardized features
    mds = MDS(n_components=2, random_state=777)
    return mds.fit_transform(X_std)

def apply_1d_mds(X):
    scaler = StandardScaler()
    X_std = scaler.fit_transform(X)  # Standardized features
    mds = MDS(n_components=1, random_state=777)
    return mds.fit_transform(X_std)


def visualize_cPro(d,y):
    d['target'] = d['target'].astype('category')

    plt.figure(figsize=(8, 8))
    scatter = sns.scatterplot(data=d, x='x', y='y', hue='target', palette='husl', edgecolor='white', linewidth=0.2, legend=False)

    plt.gca().set_aspect('equal', 'box')
    plt.gca().set_facecolor('white')

    plt.xlabel("x")
    plt.ylabel("y")
    plt.xlim(-1.1, 1.1)
    plt.ylim(-1.1, 1.1)

    plt.show()

def visualize_2d_mds(X_2d, y):
    df_2d = pd.DataFrame(X_2d, columns=['Component 1', 'Component 2'])
    df_2d['Target'] = y
        # Determine the number of unique categories
    n_categories = df_2d['Target'].nunique()

    # Option 1: Use a categorical palette that supports a large number of categories
    palette = sns.color_palette("husl", n_categories)
    plt.figure(figsize=(8, 8))
    sns.scatterplot(data=df_2d, x='Component 1', y='Component 2', hue='Target', palette=palette, linewidth=0.2, edgecolor='white', legend=False)
    plt.title('2D MDS Projection')
    plt.xlabel('Component 1')
    plt.ylabel('Component 2')
    # plt.legend().set_title('Target')
    plt.show()

def visualize_1d_mds(X_1d, y):
    df_1d = pd.DataFrame(X_1d, columns=['Component 1'])
    df_1d['y'] = 0  # Adding a dummy component for visualization
    df_1d['Target'] = y

    n_categories = df_1d['Target'].nunique()
    palette = sns.color_palette("husl", n_categories)
    plt.figure(figsize=(8, 2))
    sns.scatterplot(data=df_1d, x='Component 1', y='y', hue='Target', palette=palette, linewidth=0.2, edgecolor='white', legend=False)
    plt.title('1D MDS Projection')
    plt.xlabel('Component 1')
    plt.yticks([])
    # plt.legend(title='Target', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()


def evaluate(data_std, X_custom, X_2d, X_1d):
    custom_projection = X_custom[['x', 'y']]

    # Calculate the high-dimensional distances
    hd_euclidean_dist = euclidean_distances(data_std)
    hd_cosine_dist = cosine_distances(data_std)
    hd_manhattan_dist = manhattan_distances(data_std)

    # Calculate the low-dimensional distances for each projection
    ld_euclidean_dist_2d = euclidean_distances(X_2d)
    ld_euclidean_dist_1d = euclidean_distances(X_1d)
    ld_euclidean_dist_custom = euclidean_distances(custom_projection)

    ld_cosine_dist_2d = cosine_distances(X_2d)
    ld_cosine_dist_1d = cosine_distances(X_1d)
    ld_cosine_dist_custom = cosine_distances(custom_projection)

    ld_manhattan_dist_2d = manhattan_distances(X_2d)
    ld_manhattan_dist_1d = manhattan_distances(X_1d)
    ld_manhattan_dist_custom = manhattan_distances(custom_projection)

    def calculate_trustworthiness(X_high, X_low, n_neighbors=5, distance='euclidean'):
        if distance == 'cosine':
            X_high = cosine_distances(X_high)
            X_low = cosine_distances(X_low)
        elif distance == 'manhattan':
            X_high = manhattan_distances(X_high)
            X_low = manhattan_distances(X_low)
        
        N = X_high.shape[0]
        nn_orig = NearestNeighbors(n_neighbors=n_neighbors + 1, metric=distance).fit(X_high)
        _, indices_orig = nn_orig.kneighbors(X_high)
        
        nn_proj = NearestNeighbors(n_neighbors=N, metric=distance).fit(X_low)
        _, indices_proj = nn_proj.kneighbors(X_low)

        rank_matrix = np.full((N, n_neighbors), N)
        for i in range(N):
            for j in range(1, n_neighbors + 1):
                if indices_orig[i, j] in indices_proj[i, 1:]:
                    rank_matrix[i, j - 1] = np.where(indices_proj[i] == indices_orig[i, j])[0][0]

        rank_matrix -= (n_neighbors + 1)
        trustworthiness = 1 - (2.0 / (N * n_neighbors * (2 * N - 3 * n_neighbors - 1)) *
                            np.sum(rank_matrix[rank_matrix > n_neighbors] - n_neighbors))
        return trustworthiness

    def calculate_continuity(X_high, X_low, n_neighbors=5, distance='euclidean'):
        if distance == 'euclidean':
            X_high = euclidean_distances(X_high)
            X_low = euclidean_distances(X_low)
        elif distance == 'cosine':
            X_high = cosine_distances(X_high)
            X_low = cosine_distances(X_low)
        elif distance == 'manhattan':
            X_high = manhattan_distances(X_high)
            X_low = manhattan_distances(X_low)

        N = X_high.shape[0]
        nn_low = NearestNeighbors(n_neighbors=n_neighbors + 1, metric=distance).fit(X_low)
        _, indices_low = nn_low.kneighbors(X_low)

        nn_high = NearestNeighbors(n_neighbors=N, metric=distance).fit(X_high)
        _, indices_high = nn_high.kneighbors(X_high)

        rank_matrix = np.full((N, n_neighbors), N)
        for i in range(N):
            for j in range(1, n_neighbors + 1):
                if indices_low[i, j] in indices_high[i, 1:]:
                    rank_matrix[i, j - 1] = np.where(indices_high[i] == indices_low[i, j])[0][0]

        rank_matrix -= (n_neighbors + 1)
        continuity = 1 - (2.0 / (N * n_neighbors * (2 * N - 3 * n_neighbors - 1)) *
                        np.sum(rank_matrix[rank_matrix > n_neighbors] - n_neighbors))
        return continuity

    def calculate_stress(hd_distances, ld_distances):
        return np.sqrt(np.sum((hd_distances - ld_distances)**2) / np.sum(hd_distances**2))

    def calculate_correlation(hd_distances, ld_distances):
        return pearsonr(hd_distances.flatten(), ld_distances.flatten())[0]

    def calculate_trustworthiness(X_high, X_low, n_neighbors=5, distance='euclidean'):
        N = X_high.shape[0]
        
        # Depending on the distance metric, calculate the distances
        if distance == 'cosine':
            high_dist = cosine_distances(X_high)
            low_dist = cosine_distances(X_low)
        elif distance == 'manhattan':
            high_dist = manhattan_distances(X_high)
            low_dist = manhattan_distances(X_low)
        else:  # Euclidean
            high_dist = euclidean_distances(X_high)
            low_dist = euclidean_distances(X_low)

        # Compute nearest neighbors in the original high-dimensional space
        nn_orig = NearestNeighbors(n_neighbors=n_neighbors + 1, metric=distance).fit(X_high)
        _, indices_orig = nn_orig.kneighbors(X_high)
        
        # Compute nearest neighbors in the low-dimensional space
        nn_proj = NearestNeighbors(n_neighbors=N, metric=distance).fit(X_low)
        _, indices_proj = nn_proj.kneighbors(X_low)

        # Calculate the rank matrix
        rank_matrix = np.full((N, n_neighbors), N)
        for i in range(N):
            for j in range(1, n_neighbors + 1):
                high_neighbor = indices_orig[i, j]
                if high_neighbor in indices_proj[i]:
                    low_neighbor_rank = np.where(indices_proj[i] == high_neighbor)[0][0]
                    rank_matrix[i, j - 1] = low_neighbor_rank

        # Subtract (n_neighbors + 1) from each element in the rank matrix
        rank_matrix -= (n_neighbors + 1)

        # Calculate the trustworthiness
        trustworthiness = 1 - (2.0 / (N * n_neighbors * (2 * N - 3 * n_neighbors - 1)) * 
                            np.sum(rank_matrix[rank_matrix > 0]))
        return trustworthiness

    # Assuming 'data_std', 'X_transformed', 'd_1d', and 'custom_projection' are defined
    # Calculating distances for Euclidean, Cosine, and Manhattan
    # Function to calculate average distance (as a proxy for compactness)
    def calculate_average_distance(X, distance='euclidean'):
        if distance == 'cosine':
            dist = cosine_distances(X)
        elif distance == 'manhattan':
            dist = manhattan_distances(X)
        else:  # Euclidean
            dist = euclidean_distances(X)
        return np.mean(dist)

    # Calculate average distance for each projection and distance metric
    avg_dist_euclidean_2d = calculate_average_distance(X_2d, distance='euclidean')
    avg_dist_euclidean_1d = calculate_average_distance(X_1d, distance='euclidean')
    avg_dist_euclidean_custom = calculate_average_distance(custom_projection, distance='euclidean')

    avg_dist_cosine_2d = calculate_average_distance(X_2d, distance='cosine')
    avg_dist_cosine_1d = calculate_average_distance(X_1d, distance='cosine')
    avg_dist_cosine_custom = calculate_average_distance(custom_projection, distance='cosine')

    avg_dist_manhattan_2d = calculate_average_distance(X_2d, distance='manhattan')
    avg_dist_manhattan_1d = calculate_average_distance(X_1d, distance='manhattan')
    avg_dist_manhattan_custom = calculate_average_distance(custom_projection, distance='manhattan')

    # Euclidean distances
    hd_euclidean_dist = euclidean_distances(data_std)
    ld_euclidean_dist_2d = euclidean_distances(X_2d)
    ld_euclidean_dist_1d = euclidean_distances(X_1d)
    ld_euclidean_dist_custom = euclidean_distances(custom_projection)

    # Cosine distances
    hd_cosine_dist = cosine_distances(data_std)
    ld_cosine_dist_2d = cosine_distances(X_2d)
    ld_cosine_dist_1d = cosine_distances(X_1d)
    ld_cosine_dist_custom = cosine_distances(custom_projection)

    # Manhattan distances
    hd_manhattan_dist = manhattan_distances(data_std)
    ld_manhattan_dist_2d = manhattan_distances(X_2d)
    ld_manhattan_dist_1d = manhattan_distances(X_1d)
    ld_manhattan_dist_custom = manhattan_distances(custom_projection)

    # Stress and correlation calculations for each metric
    # Euclidean
    stress_euclidean_2d = calculate_stress(hd_euclidean_dist, ld_euclidean_dist_2d)
    stress_euclidean_1d = calculate_stress(hd_euclidean_dist, ld_euclidean_dist_1d)
    stress_euclidean_custom = calculate_stress(hd_euclidean_dist, ld_euclidean_dist_custom)
    correlation_euclidean_2d = calculate_correlation(hd_euclidean_dist, ld_euclidean_dist_2d)
    correlation_euclidean_1d = calculate_correlation(hd_euclidean_dist, ld_euclidean_dist_1d)
    correlation_euclidean_custom = calculate_correlation(hd_euclidean_dist, ld_euclidean_dist_custom)

    # Cosine
    stress_cosine_2d = calculate_stress(hd_cosine_dist, ld_cosine_dist_2d)
    stress_cosine_1d = calculate_stress(hd_cosine_dist, ld_cosine_dist_1d)
    stress_cosine_custom = calculate_stress(hd_cosine_dist, ld_cosine_dist_custom)
    correlation_cosine_2d = calculate_correlation(hd_cosine_dist, ld_cosine_dist_2d)
    correlation_cosine_1d = calculate_correlation(hd_cosine_dist, ld_cosine_dist_1d)
    correlation_cosine_custom = calculate_correlation(hd_cosine_dist, ld_cosine_dist_custom)

    # Manhattan
    stress_manhattan_2d = calculate_stress(hd_manhattan_dist, ld_manhattan_dist_2d)
    stress_manhattan_1d = calculate_stress(hd_manhattan_dist, ld_manhattan_dist_1d)
    stress_manhattan_custom = calculate_stress(hd_manhattan_dist, ld_manhattan_dist_custom)
    correlation_manhattan_2d = calculate_correlation(hd_manhattan_dist, ld_manhattan_dist_2d)
    correlation_manhattan_1d = calculate_correlation(hd_manhattan_dist, ld_manhattan_dist_1d)
    correlation_manhattan_custom = calculate_correlation(hd_manhattan_dist, ld_manhattan_dist_custom)

    # Silhouette scores for each metric
    silhouette_euclidean_2d = silhouette_score(X_2d, y)
    silhouette_euclidean_1d = silhouette_score(X_1d, y)
    silhouette_euclidean_custom = silhouette_score(custom_projection, y)

    silhouette_cosine_2d = silhouette_score(X_2d, y, metric='cosine')
    silhouette_cosine_1d = silhouette_score(X_1d, y, metric='cosine')
    silhouette_cosine_custom = silhouette_score(custom_projection,y, metric='cosine')

    silhouette_manhattan_2d = silhouette_score(X_2d,y, metric='manhattan')
    silhouette_manhattan_1d = silhouette_score(X_1d, y, metric='manhattan')
    silhouette_manhattan_custom = silhouette_score(custom_projection, y, metric='manhattan')


    # Euclidean
    trust_euclidean_2d = calculate_trustworthiness(data_std, X_2d, distance='euclidean')
    trust_euclidean_1d = calculate_trustworthiness(data_std, X_1d, distance='euclidean')
    trust_euclidean_custom = calculate_trustworthiness(data_std, custom_projection, distance='euclidean')

    # Cosine
    trust_cosine_2d = calculate_trustworthiness(data_std, X_2d, distance='cosine')
    trust_cosine_1d = calculate_trustworthiness(data_std, X_1d, distance='cosine')
    trust_cosine_custom = calculate_trustworthiness(data_std, custom_projection, distance='cosine')

    # Manhattan
    trust_manhattan_2d = calculate_trustworthiness(data_std, X_2d, distance='manhattan')
    trust_manhattan_1d = calculate_trustworthiness(data_std, X_1d, distance='manhattan')
    trust_manhattan_custom = calculate_trustworthiness(data_std, custom_projection, distance='manhattan')
    # Your code to display or analyze the calculated metrics goes here

    print("2D MDS Metrics:")
    print(f"  Euclidean - Stress: {stress_euclidean_2d}, Correlation: {correlation_euclidean_2d}, Silhouette: {silhouette_euclidean_2d}, Trustworthiness: {trust_euclidean_2d}, Avg Dist: {avg_dist_euclidean_2d}")
    print(f"  Cosine - Stress: {stress_cosine_2d}, Correlation: {correlation_cosine_2d}, Silhouette: {silhouette_cosine_2d}, Trustworthiness: {trust_cosine_2d}, Avg Dist: {avg_dist_cosine_2d}")
    print(f"  Manhattan - Stress: {stress_manhattan_2d}, Correlation: {correlation_manhattan_2d}, Silhouette: {silhouette_manhattan_2d}, Trustworthiness: {trust_manhattan_2d}, Avg Dist: {avg_dist_manhattan_2d}")
    print("\n")

    print("1D MDS Metrics:")
    print(f"  Euclidean - Stress: {stress_euclidean_1d}, Correlation: {correlation_euclidean_1d}, Silhouette: {silhouette_euclidean_1d}, Trustworthiness: {trust_euclidean_1d}, Avg Dist: {avg_dist_euclidean_1d}")
    print(f"  Cosine - Stress: {stress_cosine_1d}, Correlation: {correlation_cosine_1d}, Silhouette: {silhouette_cosine_1d}, Trustworthiness: {trust_cosine_1d}, Avg Dist: {avg_dist_cosine_1d}")
    print(f"  Manhattan - Stress: {stress_manhattan_1d}, Correlation: {correlation_manhattan_1d}, Silhouette: {silhouette_manhattan_1d}, Trustworthiness: {trust_manhattan_1d}, Avg Dist: {avg_dist_manhattan_1d}")
    print("\n")

    print("Custom Projection Metrics:")
    print(f"  Euclidean - Stress: {stress_euclidean_custom}, Correlation: {correlation_euclidean_custom}, Silhouette: {silhouette_euclidean_custom}, Trustworthiness: {trust_euclidean_custom}, Avg Dist: {avg_dist_euclidean_custom}")
    print(f"  Cosine - Stress: {stress_cosine_custom}, Correlation: {correlation_cosine_custom}, Silhouette: {silhouette_cosine_custom}, Trustworthiness: {trust_cosine_custom}, Avg Dist: {avg_dist_cosine_custom}")
    print(f"  Manhattan - Stress: {stress_manhattan_custom}, Correlation: {correlation_manhattan_custom}, Silhouette: {silhouette_manhattan_custom}, Trustworthiness: {trust_manhattan_custom}, Avg Dist: {avg_dist_manhattan_custom}")
    print("\n")

    return {
        "2D MDS Metrics": {
            "Euclidean": {
                "Stress": stress_euclidean_2d,
                "Correlation": correlation_euclidean_2d,
                "Silhouette": silhouette_euclidean_2d,
                "Trustworthiness": trust_euclidean_2d,
                "Avg Dist": avg_dist_euclidean_2d
            },
            "Cosine": {
                "Stress": stress_cosine_2d,
                "Correlation": correlation_cosine_2d,
                "Silhouette": silhouette_cosine_2d,
                "Trustworthiness": trust_cosine_2d,
                "Avg Dist": avg_dist_cosine_2d
            },
            "Manhattan": {
                "Stress": stress_manhattan_2d,
                "Correlation": correlation_manhattan_2d,
                "Silhouette": silhouette_manhattan_2d,
                "Trustworthiness": trust_manhattan_2d,
                "Avg Dist": avg_dist_manhattan_2d
            }
        },
        "1D MDS Metrics": {
            "Euclidean": {
                "Stress": stress_euclidean_1d,
                "Correlation": correlation_euclidean_1d,
                "Silhouette": silhouette_euclidean_1d,
                "Trustworthiness": trust_euclidean_1d,
                "Avg Dist": avg_dist_euclidean_1d
            },
            "Cosine": {
                "Stress": stress_cosine_1d,
                "Correlation": correlation_cosine_1d,
                "Silhouette": silhouette_cosine_1d,
                "Trustworthiness": trust_cosine_1d,
                "Avg Dist": avg_dist_cosine_1d
            },
            "Manhattan": {
                "Stress": stress_manhattan_1d,
                "Correlation": correlation_manhattan_1d,
                "Silhouette": silhouette_manhattan_1d,
                "Trustworthiness": trust_manhattan_1d,
                "Avg Dist": avg_dist_manhattan_1d
            }
        },
        "Custom Projection Metrics": {
            "Euclidean": {
                "Stress": stress_euclidean_custom,
                "Correlation": correlation_euclidean_custom,
                "Silhouette": silhouette_euclidean_custom,
                "Trustworthiness": trust_euclidean_custom,
                "Avg Dist": avg_dist_euclidean_custom
            },
            "Cosine": {
                "Stress": stress_cosine_custom,
                "Correlation": correlation_cosine_custom,
                "Silhouette": silhouette_cosine_custom,
                "Trustworthiness": trust_cosine_custom,
                "Avg Dist": avg_dist_cosine_custom
            },
            "Manhattan": {
                "Stress": stress_manhattan_custom,
                "Correlation": correlation_manhattan_custom,
                "Silhouette": silhouette_manhattan_custom,
                "Trustworthiness": trust_manhattan_custom,
                "Avg Dist": avg_dist_manhattan_custom
            }
        }
    }




def preprocess_iris():
    iris = datasets.load_iris(as_frame=True)
    iris_std = iris.data - iris.data.mean()
    y = iris.target
    X = iris_std
    # hd_dist_mat = cosine_distances(iris_std.to_numpy()) / 2
    return X, y, iris_std

def preprocess_2dsphere(n_points=100, n_classes=2, d=2):
    data = skdim.datasets.hyperSphere(n=n_points, d=d)
    df = pd.DataFrame(data, columns=['x', 'y'])
    # Classify points based on the x coordinate being positive or negative
    y = np.where(df['x'] >= 0, 1, 0)
    scaler = StandardScaler()
    data_std = scaler.fit_transform(df)
    return df, y, data_std

def preprocess_3dsphere(n_points=100, n_classes=2, d=3):
    data = skdim.datasets.hyperSphere(n=n_points, d=d)
    df = pd.DataFrame(data, columns=['x', 'y', 'z'])
    y = np.where(df['x'] >= 0, 1, 0)
    scaler = StandardScaler()
    data_std = scaler.fit_transform(df)
    return df, y, data_std

def preprocess_4dsphere(n_points=100, n_classes=2, d=4):
    data = skdim.datasets.hyperSphere(n=n_points, d=d)
    df = pd.DataFrame(data, columns=['x', 'y', 'z', 'a'])
    y = np.where(df['x'] >= 0, 1, 0)
    scaler = StandardScaler()
    data_std = scaler.fit_transform(df)
    return df, y, data_std

def preprocess_5dsphere(n_points=100, n_classes=2, d=5):
    data = skdim.datasets.hyperSphere(n=n_points, d=d)
    df = pd.DataFrame(data, columns=['x', 'y', 'z', 'a', 'b'])
    y = np.where(df['x'] >= 0, 1, 0)
    scaler = StandardScaler()
    data_std = scaler.fit_transform(df)
    return df, y, data_std

def preprocess_s_curve(n_points=1000):
    from sklearn.datasets import make_s_curve
    data, t = make_s_curve(n_points, random_state=777)
    data_mean_subtracted = data - np.mean(data, axis=0)
    labels = np.where(t > np.median(t), 1, 0)
    df = pd.DataFrame(data_mean_subtracted, columns=['x', 'y', 'z'])
    return data_mean_subtracted, labels, df

def preprocess_citations(sample_frac=1):
    import json
    json_path = "data/papers_with_keys.json"
    with open(json_path) as file:
        data = json.load(file)
    papers_df = pd.DataFrame(data['papers'])
    papers_df_sampled = papers_df.sample(frac=sample_frac, random_state=777)
    features = ['citationCount', 'referenceCount', 'year']
    papers_mean_subtracted = papers_df_sampled[features] - papers_df_sampled[features].mean()
    papers_df_sampled['layer'] = papers_df_sampled['layer'].astype('category')
    # Assuming papers_mean_subtracted serves both as D and data_std
    return papers_mean_subtracted, papers_df_sampled['layer'], papers_mean_subtracted


def preprocess_concentric_circles(num_circles=3, num_points=100):
    # Initialize arrays to store data
    all_x = []
    all_y = []
    classes = []

    # Generate points for each circle
    for i in range(num_circles):
        radius = 1 + i * 0.5  # Incrementing radius for each circle
        theta = np.linspace(0, 2 * np.pi, num_points)
        x = radius * np.cos(theta)
        y = radius * np.sin(theta)

        all_x.extend(x)
        all_y.extend(y)
        classes.extend([i] * num_points)  # Assign class 'i' for each circle

    # Convert to DataFrame
    df = pd.DataFrame({'x': all_x, 'y': all_y, 'class': classes})
    
    # Subtract the mean from each feature
    df[['x', 'y']] = df[['x', 'y']] - df[['x', 'y']].mean()

    # Extracting X and y
    X = df[['x', 'y']].values  # Features with mean subtracted
    y = df['class'].values  # Classes

    return X, y, df

from sklearn.datasets import make_blobs

import matplotlib.pyplot as plt
import seaborn as sns
def preprocess_blobs(n_points=80, n_clusters=4):
    centers = [(-5, -5), (5, -5), (-5, 5), (5, 5)]
    data, labels = make_blobs(n_samples=n_points, centers=centers, n_features=2, random_state=777)
    df = pd.DataFrame(data, columns=['x', 'y'])
    df_shifted = df - df.mean()

    plt.figure(figsize=(6, 6))
    for i in range(n_clusters):
        plt.scatter(df_shifted['x'][labels == i], df_shifted['y'][labels == i], label=f'Cluster {i}', edgecolor='k', s=50, alpha=0.7)
    plt.title("Blobs in Quadrants Visualization")
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")
    plt.legend()
    plt.grid(True)
    plt.show()

    return df_shifted, labels, df_shifted


def preprocess_penguins():
    penguins = sns.load_dataset('penguins').dropna().reset_index(drop=True)
    features = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
    penguins_features = penguins[features]
    penguins_std = penguins_features - penguins_features.mean()
    hd_dist_mat = cosine_distances(penguins_std) / 2
    target = 'species'
    species_mapping = {species: idx for idx, species in enumerate(penguins[target].unique())}
    y = penguins[target].map(species_mapping).values
    sns.pairplot(penguins_std.join(penguins[target]), hue=target)
    plt.suptitle('Pairplot of Standardized Penguin Features', verticalalignment='top')
    return penguins_std, y, penguins_std


def preprocess_penguins_around_max():
    penguins = sns.load_dataset('penguins').dropna().reset_index(drop=True)
    features = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
    penguins_features = penguins[features]
    
    # Step 1: Center the data by subtracting the mean
    penguins_centered = penguins_features - penguins_features.mean()

    # Find the maximum value point in the centered data
    max_point = penguins_centered.max()

    # Assuming specified_point is a list or array of the desired shift in each dimension
    specified_point = [1, 1, 1, 1]  # Example: shift each feature so that the maximum is at 1

    # Calculate the difference between the specified point and the maximum value point
    shift = specified_point - max_point

    # Step 2: Shift the centered data to the specified point
    penguins_std = penguins_centered + shift


    hd_dist_mat = cosine_distances(penguins_std) / 2
    target = 'species'
    species_mapping = {species: idx for idx, species in enumerate(penguins[target].unique())}
    y = penguins[target].map(species_mapping).values
    sns.pairplot(penguins_std.join(penguins[target]), hue=target)
    plt.suptitle('Pairplot of Standardized Penguin Features', verticalalignment='top')
    return penguins_std, y, penguins_std

def preprocess_penguins_around_min_halfway():
    penguins = sns.load_dataset('penguins').dropna().reset_index(drop=True)
    features = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
    penguins_features = penguins[features]
    
    # Step 1: Center the data by subtracting the mean
    penguins_centered = penguins_features - penguins_features.mean()

    # Find the minimum value point in the centered data
    min_point = penguins_centered.min()

    # Assuming specified_point is a list or array of the desired shift in each dimension
    specified_point = [1, 1, 1, 1]  # Example: shift each feature so that the minimum is halfway to 1

    # Calculate the halfway point between the specified point and the minimum value point
    halfway_shift = (specified_point + min_point) / 2

    # Calculate the final shift to apply to the centered data
    shift = halfway_shift - min_point

    # Step 2: Shift the centered data to the halfway point
    penguins_std = penguins_centered + shift

    # Calculate the pairwise cosine distances
    hd_dist_mat = cosine_distances(penguins_std) / 2
    target = 'species'
    species_mapping = {species: idx for idx, species in enumerate(penguins[target].unique())}
    y = penguins[target].map(species_mapping).values
    
    # Pairplot of the standardized features
    sns.pairplot(penguins_std.join(penguins[[target]]), hue=target)
    plt.suptitle('Pairplot of Standardized Penguin Features Shifted Halfway', verticalalignment='top')
    
    return penguins_std, y, penguins_std





def preprocess_unbalanced():
    np.random.seed(777)
    n_cluster_points = 100
    n_isolated_points = 3
    cluster = np.random.normal(loc=[-2, 0], scale=0.5, size=(n_cluster_points, 2))
    isolated_points = np.array([[5, 0], [5, 1], [5, -1]])
    data = np.vstack([cluster, isolated_points])
    df = pd.DataFrame(data, columns=['x', 'y'])
    df['target'] = [0] * n_cluster_points + list(range(1, n_isolated_points + 1))
    data_std = data - data.mean()
    hd_dist_mat = cosine_distances(data_std) / 2
    y = df['target'].values
    plt.figure(figsize=(8, 6))
    sns.scatterplot(data=df, x='x', y='y', hue='target', palette='viridis', style='target', markers=True)
    plt.title('Unbalanced Dataset Visualization')
    plt.xlabel('Standardized X')
    plt.ylabel('Standardized Y')
    plt.legend(title='Target')
    plt.show()
    return data_std, y, data_std


from sklearn.datasets import make_blobs
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d.art3d import Line3DCollection


def preprocess_3d_blobs_v1(n_points=400, n_clusters=8, cluster_std=0.5):
    # Define cluster centers for 8 clusters in 3D
    centers = [(-5, -5, -5), (5, -5, -5), (-5, 5, -5), (5, 5, -5), 
               (-5, -5, 5), (5, -5, 5), (-5, 5, 5), (5, 5, 5)]
    X, y = make_blobs(n_samples=n_points, centers=centers, cluster_std=cluster_std, n_features=3, random_state=42)

    # Convert to DataFrame for easier plotting and manipulation
    df = pd.DataFrame(X, columns=['x', 'y', 'z'])
    df['target'] = y

    # Normalize data to range [-1, 1] and center the mean to 0
    df[['x', 'y', 'z']] = (df[['x', 'y', 'z']] - df[['x', 'y', 'z']].mean()) / (df[['x', 'y', 'z']].max() - df[['x', 'y', 'z']].min()) * 2

    # Plot
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d', facecolor='white')
    ax.grid(True, linestyle='-', color='whitesmoke', alpha=0.8)
    ax.set_xticks([-1, -0.5, 0, 0.5, 1])
    ax.set_yticks([-1, -0.5, 0, 0.5, 1])
    ax.set_zticks([-1, -0.5, 0, 0.5, 1])

    # Define color palette
    palette = sns.color_palette("husl", n_clusters)

    # Plot each cluster with different color
    for i in range(n_clusters):
        ax.scatter(df.loc[df['target'] == i, 'x'], df.loc[df['target'] == i, 'y'], df.loc[df['target'] == i, 'z'], 
                   color=palette[i], edgecolor='white', linewidth=0.5, marker='o', s=20)

    ax.set_title('3D Blob Dataset with 8 Clusters (Normalized)')
    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    ax.set_zlabel('Z axis')

    plt.show()
    print(df, y, df)
    return df.drop('target', axis=1), y, df

def preprocess_3d_blobs_v2(n_points=400, n_clusters=8, cluster_std=1):
    # Define cluster centers for 8 clusters in 3D
    centers = [(-5, -5, -5), (5, -5, -5), (-5, 5, -5), (5, 5, -5), 
               (-5, -5, 5), (5, -5, 5), (-5, 5, 5), (5, 5, 5)]
    X, y = make_blobs(n_samples=n_points, centers=centers, cluster_std=cluster_std, n_features=3, random_state=42)

    # Convert to DataFrame for easier plotting and manipulation
    df = pd.DataFrame(X, columns=['x', 'y', 'z'])
    df['target'] = y

    # Normalize data to range [-1, 1] and center the mean to 0
    df[['x', 'y', 'z']] = (df[['x', 'y', 'z']] - df[['x', 'y', 'z']].mean()) / (df[['x', 'y', 'z']].max() - df[['x', 'y', 'z']].min()) * 2

    # Plot
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d', facecolor='white')
    ax.grid(True, linestyle='-', color='whitesmoke', alpha=0.8)
    ax.set_xticks([-1, -0.5, 0, 0.5, 1])
    ax.set_yticks([-1, -0.5, 0, 0.5, 1])
    ax.set_zticks([-1, -0.5, 0, 0.5, 1])

    # Define color palette
    palette = sns.color_palette("husl", n_clusters)

    # Plot each cluster with different color
    for i in range(n_clusters):
        ax.scatter(df.loc[df['target'] == i, 'x'], df.loc[df['target'] == i, 'y'], df.loc[df['target'] == i, 'z'], 
                   color=palette[i], edgecolor='white', linewidth=0.5, marker='o', s=20)

    ax.set_title('3D Blob Dataset with 8 Clusters (Normalized)')
    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    ax.set_zlabel('Z axis')

    plt.show()
    print(df, y, df)
    return df.drop('target', axis=1), y, df

def preprocess_3d_blobs_v3(n_points=400, n_clusters=8, cluster_std=4):
    # Define cluster centers for 8 clusters in 3D
    centers = [(-5, -5, -5), (5, -5, -5), (-5, 5, -5), (5, 5, -5), 
               (-5, -5, 5), (5, -5, 5), (-5, 5, 5), (5, 5, 5)]
    X, y = make_blobs(n_samples=n_points, centers=centers, cluster_std=cluster_std, n_features=3, random_state=42)

    # Convert to DataFrame for easier plotting and manipulation
    df = pd.DataFrame(X, columns=['x', 'y', 'z'])
    df['target'] = y

    # Normalize data to range [-1, 1] and center the mean to 0
    df[['x', 'y', 'z']] = (df[['x', 'y', 'z']] - df[['x', 'y', 'z']].mean()) / (df[['x', 'y', 'z']].max() - df[['x', 'y', 'z']].min()) * 2

    # Plot
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d', facecolor='white')
    ax.grid(True, linestyle='-', color='whitesmoke', alpha=0.8)
    ax.set_xticks([-1, -0.5, 0, 0.5, 1])
    ax.set_yticks([-1, -0.5, 0, 0.5, 1])
    ax.set_zticks([-1, -0.5, 0, 0.5, 1])

    # Define color palette
    palette = sns.color_palette("husl", n_clusters)

    # Plot each cluster with different color
    for i in range(n_clusters):
        ax.scatter(df.loc[df['target'] == i, 'x'], df.loc[df['target'] == i, 'y'], df.loc[df['target'] == i, 'z'], 
                   color=palette[i], edgecolor='white', linewidth=0.5, marker='o', s=20)

    ax.set_title('3D Blob Dataset with 8 Clusters (Normalized)')
    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    ax.set_zlabel('Z axis')

    plt.show()
    print(df, y, df)
    return df.drop('target', axis=1), y, df

def preprocess_3d_blobs_v4(n_points=400, n_clusters=8, cluster_std=8):
    # Define cluster centers for 8 clusters in 3D
    centers = [(-5, -5, -5), (5, -5, -5), (-5, 5, -5), (5, 5, -5), 
               (-5, -5, 5), (5, -5, 5), (-5, 5, 5), (5, 5, 5)]
    X, y = make_blobs(n_samples=n_points, centers=centers, cluster_std=cluster_std, n_features=3, random_state=42)

    # Convert to DataFrame for easier plotting and manipulation
    df = pd.DataFrame(X, columns=['x', 'y', 'z'])
    df['target'] = y

    # Normalize data to range [-1, 1] and center the mean to 0
    df[['x', 'y', 'z']] = (df[['x', 'y', 'z']] - df[['x', 'y', 'z']].mean()) / (df[['x', 'y', 'z']].max() - df[['x', 'y', 'z']].min()) * 2

    # Plot
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d', facecolor='white')
    ax.grid(True, linestyle='-', color='whitesmoke', alpha=0.8)
    ax.set_xticks([-1, -0.5, 0, 0.5, 1])
    ax.set_yticks([-1, -0.5, 0, 0.5, 1])
    ax.set_zticks([-1, -0.5, 0, 0.5, 1])

    # Define color palette
    palette = sns.color_palette("husl", n_clusters)

    # Plot each cluster with different color
    for i in range(n_clusters):
        ax.scatter(df.loc[df['target'] == i, 'x'], df.loc[df['target'] == i, 'y'], df.loc[df['target'] == i, 'z'], 
                   color=palette[i], edgecolor='white', linewidth=0.5, marker='o', s=20)

    ax.set_title('3D Blob Dataset with 8 Clusters (Normalized)')
    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    ax.set_zlabel('Z axis')

    plt.show()
    print(df, y, df)
    return df.drop('target', axis=1), y, df


# Re-import necessary libraries and redefine the function to create a flatter torus with denser distribution
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm

def preprocess_3d_torus(n_points=30, R=3, r=0.6, n_targets=5, noise_level=0.05):

    theta = np.linspace(0, 2 * np.pi, n_points)
    phi = np.linspace(0, 2 * np.pi, n_points)
    theta, phi = np.meshgrid(theta, phi)
    theta = theta.flatten()
    phi = phi.flatten()
    
    x = (R + r * np.cos(theta)) * np.cos(phi)
    y = (R + r * np.cos(theta)) * np.sin(phi)
    z = r * np.sin(theta) * 0.1  

    x += np.random.normal(0, noise_level, x.shape)
    y += np.random.normal(0, noise_level, y.shape)
    z += np.random.normal(0, noise_level, z.shape)

    df = pd.DataFrame({'x': x, 'y': y, 'z': z})

    df[['x', 'y']] = (df[['x', 'y']] - df[['x', 'y']].mean()) / (df[['x', 'y']].max() - df[['x', 'y']].min()) * 2

    df['target'] = np.floor(((phi / (2 * np.pi)) * n_targets)).astype(int)


    num_points = len(df)
    df['continuous_target'] = np.linspace(0, 1, num_points)  

    fig = plt.figure(figsize=(12, 12))  
    ax = fig.add_subplot(111, projection='3d', facecolor='white')
    ax.grid(False)  # This disables the grid

    from matplotlib.colors import ListedColormap
    num_colors = len(df['continuous_target'].unique())
    husl_colors = sns.color_palette("husl", num_colors) 
    husl_cmap = ListedColormap(husl_colors) 


    colormap = plt.get_cmap('hsv') 

    sc = ax.scatter(df['x'], df['y'], df['z'], c=df['continuous_target'], cmap=husl_cmap, edgecolor='white', linewidth=0.5, s=20)

    ax.set_title('Varied 3D Torus Dataset with Continuous Color Scale')
    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    ax.set_zlabel('Z axis')
    ax.set_xlim(-1, 1)
    ax.set_ylim(-1, 1)
    ax.set_zlim(-0.5, 0.5)
    ax.set_axis_off()  

    plt.show()

    return df.drop(['target', 'continuous_target'], axis=1), df['continuous_target'].values, df







def preprocess_concentric_circles(num_circles=3, num_points=100):

    all_x = []
    all_y = []
    classes = []

    for i in range(num_circles):
        radius = 1 + i * 0.5  
        theta = np.linspace(0, 2 * np.pi, num_points)
        x = radius * np.cos(theta)
        y = radius * np.sin(theta)

        all_x.extend(x)
        all_y.extend(y)
        classes.extend([i] * num_points)  

    df = pd.DataFrame({'x': all_x, 'y': all_y, 'class': classes})
    
    df[['x', 'y']] = df[['x', 'y']] - df[['x', 'y']].mean()

    X = df[['x', 'y']].values 
    y = df['class'].values

    return X, y, df



def preprocess_concentric_circles(num_circles=3, num_points=100):
    all_x = []
    all_y = []
    classes = []

    for i in range(num_circles):
        radius = 1 + i * 0.5  
        theta = np.linspace(0, 2 * np.pi, num_points)
        x = radius * np.cos(theta)
        y = radius * np.sin(theta)

        all_x.extend(x)
        all_y.extend(y)
        classes.extend([i] * num_points)  

    df = pd.DataFrame({'x': all_x, 'y': all_y, 'class': classes})
    
    df[['x', 'y']] = df[['x', 'y']] - df[['x', 'y']].mean()

    X = df[['x', 'y']].values  
    y = df['class'].values  

    return X, y, df




evaluation_file_path = "evaluation_output/evaluation_data.txt"
loss_file_path = "evaluation_output/loss_history.txt"

if os.path.exists(evaluation_file_path):
    os.remove(evaluation_file_path)

if os.path.exists(loss_file_path):
    os.remove(loss_file_path)

n_evaluations = 5 

def evaluate_n_times(data_std, X_custom, X_2d, X_1d, n=5):
    all_metrics = []

    for _ in range(n):
        metrics = evaluate(data_std, X_custom, X_2d, X_1d)
        all_metrics.append(metrics)

    avg_var_metrics = {}
    for dimension in all_metrics[0]:
        avg_var_metrics[dimension] = {}
        for projection in all_metrics[0][dimension]:
            avg_var_metrics[dimension][projection] = {}
            for metric in all_metrics[0][dimension][projection]:
                values = [m[dimension][projection][metric] for m in all_metrics]
                avg_var_metrics[dimension][projection][metric] = {
                    "Average": np.mean(values),
                    "Variance": np.var(values)
                }
    
    return avg_var_metrics

data_preprocessing_funcs = {
    "iris": preprocess_iris,
    "2dsphere": preprocess_2dsphere, 
    "3dsphere": preprocess_3dsphere,  
    "4dsphere": preprocess_4dsphere,  
    "5dsphere": preprocess_5dsphere,
    "blobs": preprocess_blobs,
    "blobs_3d_v1" : preprocess_3d_blobs_v1,
    "blobs_3d_v2" : preprocess_3d_blobs_v2,
    "blobs_3d_v3" : preprocess_3d_blobs_v3,
    "blobs_3d_v4" : preprocess_3d_blobs_v4,
    "torus" : preprocess_3d_torus,
    "concentric_circles" : preprocess_concentric_circles,
    "penguins" : preprocess_penguins,
    "penguins" : preprocess_penguins_around_max,
    "penguins" : preprocess_penguins_around_min_halfway,
    "unbalanced" : preprocess_unbalanced,
     "s_curve" : preprocess_s_curve,
    # "three_circles" : preprocess_concentric_circles,
    # "citations" : preprocess_citations   # will be possible when published 
}

loss_histories = {}

for label, func in data_preprocessing_funcs.items():
    print("||||||||||||||||||||" + label + "|||||||||||||||")
    D, y, data_std = func()
    X_2d = apply_2d_mds(D)
    X_1d = apply_1d_mds(D)

    gd_result = gradient_descent(D, n_iter=20, learning_rate=0.02)
    X_custom = gd_result["data_prepared"]
    loss = gd_result["loss"]
    loss_history = gd_result["loss_history"]
    gd_time = gd_result["time"]
    data_raw = gd_result["points"]

    plt.plot(loss_history)
    plt.title(f'Loss History for {label}')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.savefig(f"evaluation_output/loss_images/{label}_loss_history.png")
    plt.clf()  #

    visualize_cPro(X_custom, y)
    visualize_2d_mds(X_2d, y)
    visualize_1d_mds(X_1d, y)

    evaluation_results = evaluate_n_times(data_std, X_custom, X_2d, X_1d, n=n_evaluations)

    if 'id' not in X_custom.columns:
        X_custom['id'] = range(1, len(D) + 1)
    export_path = f"evaluation_output/projections/{label}_cPro.csv"
    X_custom.to_csv(export_path, index=False)
    print(f"Data exported for {label} to {export_path}")

    with open(evaluation_file_path, 'a') as file:
        file.write(f"\n--- Evaluation Metrics for {label} ---\n")
        for dimension, projections in evaluation_results.items():
            file.write(f"{dimension}:\n")
            for projection, metrics in projections.items():
                file.write(f"  {projection}:\n")
                for metric, stats in metrics.items():
                    file.write(f"    {metric}: Average = {stats['Average']}, Variance = {stats['Variance']}\n")

    loss_histories[label] = loss_history
    with open(loss_file_path, 'a') as file:
        file.write(f"\n--- Loss History for {label} ---\n")
        for iteration, loss in enumerate(loss_history, start=1):
            file.write(f"Iteration {iteration}: Loss {loss}\n")


plt.figure(figsize=(10, 6))

for label, loss_history in loss_histories.items():
    if isinstance(loss_history, torch.Tensor):
        if loss_history.ndim > 0:  
            loss_history = loss_history.tolist()
        else:  
            loss_history = [loss_history.item()]
    elif isinstance(loss_history, float): 
        loss_history = [loss_history]

    iterations = list(range(1, len(loss_history) + 1))
    plt.plot(iterations, loss_history, label=label)

plt.title('Loss History for Different Datasets')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.legend()

plt.savefig('evaluation_output/loss_images/combined_loss_images.png')
plt.show()