Skip to content

Commit

Permalink
formatting fix
Browse files Browse the repository at this point in the history
  • Loading branch information
rahulkhorana committed Mar 24, 2024
1 parent 0cf352a commit 6c0803f
Show file tree
Hide file tree
Showing 18 changed files with 1,263 additions and 651 deletions.
138 changes: 97 additions & 41 deletions experiments/esol_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import numpy as np
from load_process_data import LoadDatasetForTask

#botorch specific
# botorch specific
from botorch.models.gp_regression import ExactGP

#gpytorch specific
# gpytorch specific
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel
from gpytorch.distributions import MultivariateNormal
Expand All @@ -25,8 +25,8 @@
if torch.cuda.is_available():
dev = "cuda:0"
torch.cuda.empty_cache()
else:
dev = "cpu"
else:
dev = "cpu"
device = torch.device(dev)


Expand All @@ -41,6 +41,7 @@ def forward(self, x):
covar_x = self.covar_module(x)
return MultivariateNormal(mean_x, covar_x)


class GraphGP(SIGP):
def __init__(self, train_x, train_y, likelihood, kernel, **kernel_kwargs):
super().__init__(train_x, train_y, likelihood)
Expand All @@ -61,67 +62,122 @@ def forward(self, x):
return MultivariateNormal(mean, covariance)


def initialize_model(train_x:torch.Tensor, train_obj:torch.Tensor, likelihood):
def initialize_model(train_x: torch.Tensor, train_obj: torch.Tensor, likelihood):
model = ExactGPModel(train_x, train_obj, likelihood).to(train_x)
return model


def initialize_graph_gp(train_x, train_obj, likelihood, kernel, **kernel_kwargs):
model = GraphGP(train_x, train_obj, likelihood, kernel, **kernel_kwargs)
model = GraphGP(train_x, train_obj, likelihood, kernel, **kernel_kwargs)
return model


def one_experiment(target, encoding, n_trials, n_iters):
X,y = [], []
if encoding == 'complexes':
X,y = LoadDatasetForTask(X='dataset/esol/fast_complex_lookup_repn.pkl', y='dataset/esol/ESOL.csv', repn=encoding, y_column=target).load_esol()
elif encoding == 'deep_complexes':
X,y = LoadDatasetForTask(X='dataset/esol/deep_complex_lookup_repn.pkl',y='dataset/esol/ESOL.csv',repn=encoding, y_column=target).load_esol()
elif ENCODING == 'fingerprints':
X,y = LoadDatasetForTask(X='gauche_ecfp', y='dataset/esol/ESOL.csv', repn=encoding, y_column=target).load_esol()
elif ENCODING == 'SELFIES':
X,y = LoadDatasetForTask(X='gauche_selfies', y='dataset/esol/ESOL.csv', repn=encoding, y_column=target).load_esol()
elif ENCODING == 'GRAPHS':
X,y = LoadDatasetForTask(X='gauche_graphs', y='dataset/esol/ESOL.csv', repn=encoding, y_column=target).load_esol()

if ENCODING != 'GRAPHS':
r2_list,rmse_list, mae_list, confidence_percentiles, mae_mean, mae_std = evaluate_model(initialize_model=initialize_model, n_trials=n_trials, n_iters=n_iters, test_set_size=holdout_set_size, X=X, y=y, figure_path=f'results/{EXPERIMENT_TYPE}/confidence_mae_model_{ENCODING}_{target}.png')
X, y = [], []
if encoding == "complexes":
X, y = LoadDatasetForTask(
X="dataset/esol/fast_complex_lookup_repn.pkl",
y="dataset/esol/ESOL.csv",
repn=encoding,
y_column=target,
).load_esol()
elif encoding == "deep_complexes":
X, y = LoadDatasetForTask(
X="dataset/esol/deep_complex_lookup_repn.pkl",
y="dataset/esol/ESOL.csv",
repn=encoding,
y_column=target,
).load_esol()
elif ENCODING == "fingerprints":
X, y = LoadDatasetForTask(
X="gauche_ecfp", y="dataset/esol/ESOL.csv", repn=encoding, y_column=target
).load_esol()
elif ENCODING == "SELFIES":
X, y = LoadDatasetForTask(
X="gauche_selfies",
y="dataset/esol/ESOL.csv",
repn=encoding,
y_column=target,
).load_esol()
elif ENCODING == "GRAPHS":
X, y = LoadDatasetForTask(
X="gauche_graphs", y="dataset/esol/ESOL.csv", repn=encoding, y_column=target
).load_esol()

if ENCODING != "GRAPHS":
r2_list, rmse_list, mae_list, confidence_percentiles, mae_mean, mae_std = (
evaluate_model(
initialize_model=initialize_model,
n_trials=n_trials,
n_iters=n_iters,
test_set_size=holdout_set_size,
X=X,
y=y,
figure_path=f"results/{EXPERIMENT_TYPE}/confidence_mae_model_{ENCODING}_{target}.png",
)
)
else:
r2_list,rmse_list, mae_list, confidence_percentiles, mae_mean, mae_std = evaluate_graph_model(initialize_graph_gp, n_trials=n_trials, n_iters=n_iters, test_set_size=holdout_set_size, X=X, y=y, figure_path=f'results/{EXPERIMENT_TYPE}/confidence_mae_model_{ENCODING}_{target}.png')

mean_r2 = "\nmean R^2: {:.4f} +- {:.4f}".format(np.mean(r2_list), np.std(r2_list)/np.sqrt(len(r2_list)))
mean_rmse = "mean RMSE: {:.4f} +- {:.4f}".format(np.mean(rmse_list), np.std(rmse_list)/np.sqrt(len(rmse_list)))
mean_mae = "mean MAE: {:.4f} +- {:.4f}\n".format(np.mean(mae_list), np.std(mae_list)/np.sqrt(len(mae_list)))
r2_list, rmse_list, mae_list, confidence_percentiles, mae_mean, mae_std = (
evaluate_graph_model(
initialize_graph_gp,
n_trials=n_trials,
n_iters=n_iters,
test_set_size=holdout_set_size,
X=X,
y=y,
figure_path=f"results/{EXPERIMENT_TYPE}/confidence_mae_model_{ENCODING}_{target}.png",
)
)

mean_r2 = "\nmean R^2: {:.4f} +- {:.4f}".format(
np.mean(r2_list), np.std(r2_list) / np.sqrt(len(r2_list))
)
mean_rmse = "mean RMSE: {:.4f} +- {:.4f}".format(
np.mean(rmse_list), np.std(rmse_list) / np.sqrt(len(rmse_list))
)
mean_mae = "mean MAE: {:.4f} +- {:.4f}\n".format(
np.mean(mae_list), np.std(mae_list) / np.sqrt(len(mae_list))
)
return mean_r2, mean_rmse, mean_mae


if __name__ == '__main__':
EXPERIMENT_TYPE = 'ESOL'
ENCODING = 'GRAPHS'
if __name__ == "__main__":
EXPERIMENT_TYPE = "ESOL"
ENCODING = "GRAPHS"
N_TRIALS = 20
N_ITERS = 5
holdout_set_size = 0.33
# dataset processing
X,y = [], []
X, y = [], []
# dataset loading
possible_target_cols = ['ESOL predicted log solubility in mols per litre','Minimum Degree','Molecular Weight','Number of H-Bond Donors','Number of Rings','Number of Rotatable Bonds','Polar Surface Area','measured log solubility in mols per litre']
possible_target_cols = [
"ESOL predicted log solubility in mols per litre",
"Minimum Degree",
"Molecular Weight",
"Number of H-Bond Donors",
"Number of Rings",
"Number of Rotatable Bonds",
"Polar Surface Area",
"measured log solubility in mols per litre",
]

results = []

for col in possible_target_cols:
mean_r2, mean_rmse, mean_mae = one_experiment(col, ENCODING,N_TRIALS,N_ITERS)
mean_r2, mean_rmse, mean_mae = one_experiment(col, ENCODING, N_TRIALS, N_ITERS)
results.append([col, mean_r2, mean_rmse, mean_mae])


if type(EXPERIMENT_TYPE) is str:
trial_num = len(os.listdir(f'results/{EXPERIMENT_TYPE}'))
trial_num = len(os.listdir(f"results/{EXPERIMENT_TYPE}"))
results_path = f"results/{EXPERIMENT_TYPE}/{ENCODING}_{time.time()}.txt"

with open(results_path, 'w') as f:
f.write(EXPERIMENT_TYPE + ':')
f.write('\n')
f.write(ENCODING + ':')
with open(results_path, "w") as f:
f.write(EXPERIMENT_TYPE + ":")
f.write("\n")
f.write(ENCODING + ":")
for result in results:
col, mean_r2, mean_rmse, mean_mae = result
f.write(f'column: {col}, {mean_r2}, {mean_rmse}, {mean_mae}')
f.write('\n')
f.write(f"column: {col}, {mean_r2}, {mean_rmse}, {mean_mae}")
f.write("\n")
f.close()
print("CONCLUDED")
print("CONCLUDED")
135 changes: 94 additions & 41 deletions experiments/freesolv_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import numpy as np
from load_process_data import LoadDatasetForTask

#botorch specific
# botorch specific
from botorch.models.gp_regression import ExactGP

#gpytorch specific
# gpytorch specific
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel
from gpytorch.distributions import MultivariateNormal
Expand All @@ -25,8 +25,8 @@
if torch.cuda.is_available():
dev = "cuda:0"
torch.cuda.empty_cache()
else:
dev = "cpu"
else:
dev = "cpu"
device = torch.device(dev)


Expand All @@ -41,6 +41,7 @@ def forward(self, x):
covar_x = self.covar_module(x)
return MultivariateNormal(mean_x, covar_x)


class GraphGP(SIGP):
def __init__(self, train_x, train_y, likelihood, kernel, **kernel_kwargs):
super().__init__(train_x, train_y, likelihood)
Expand All @@ -61,67 +62,119 @@ def forward(self, x):
return MultivariateNormal(mean, covariance)


def initialize_model(train_x:torch.Tensor, train_obj:torch.Tensor, likelihood):
def initialize_model(train_x: torch.Tensor, train_obj: torch.Tensor, likelihood):
model = ExactGPModel(train_x, train_obj, likelihood).to(train_x)
return model


def initialize_graph_gp(train_x, train_obj, likelihood, kernel, **kernel_kwargs):
model = GraphGP(train_x, train_obj, likelihood, kernel, **kernel_kwargs)
model = GraphGP(train_x, train_obj, likelihood, kernel, **kernel_kwargs)
return model


def one_experiment(target, encoding, n_trials, n_iters):
X,y = [], []
if encoding == 'complexes':
X,y = LoadDatasetForTask(X='dataset/free_solv/fast_complex_lookup_repn.pkl', y='dataset/free_solv/FreeSolv.csv', repn=encoding, y_column=target).load_freesolv()
elif encoding == 'deep_complexes':
X,y = LoadDatasetForTask(X='dataset/free_solv/deep_complex_lookup_repn.pkl',y='dataset/free_solv/FreeSolv.csv',repn=encoding, y_column=target).load_freesolv()
elif ENCODING == 'fingerprints':
X,y = LoadDatasetForTask(X='gauche_ecfp', y='dataset/free_solv/FreeSolv.csv', repn=encoding, y_column=target).load_freesolv()
elif ENCODING == 'SELFIES':
X,y = LoadDatasetForTask(X='gauche_selfies', y='dataset/free_solv/FreeSolv.csv', repn=encoding, y_column=target).load_freesolv()
elif ENCODING == 'GRAPHS':
X,y = LoadDatasetForTask(X='gauche_graphs', y='dataset/free_solv/FreeSolv.csv', repn=encoding, y_column=target).load_freesolv()

if ENCODING != 'GRAPHS':
r2_list,rmse_list, mae_list, confidence_percentiles, mae_mean, mae_std = evaluate_model(initialize_model=initialize_model, n_trials=n_trials, n_iters=n_iters, test_set_size=holdout_set_size, X=X, y=y, figure_path=f'results/{EXPERIMENT_TYPE}/confidence_mae_model_{ENCODING}_{target}.png')
X, y = [], []
if encoding == "complexes":
X, y = LoadDatasetForTask(
X="dataset/free_solv/fast_complex_lookup_repn.pkl",
y="dataset/free_solv/FreeSolv.csv",
repn=encoding,
y_column=target,
).load_freesolv()
elif encoding == "deep_complexes":
X, y = LoadDatasetForTask(
X="dataset/free_solv/deep_complex_lookup_repn.pkl",
y="dataset/free_solv/FreeSolv.csv",
repn=encoding,
y_column=target,
).load_freesolv()
elif ENCODING == "fingerprints":
X, y = LoadDatasetForTask(
X="gauche_ecfp",
y="dataset/free_solv/FreeSolv.csv",
repn=encoding,
y_column=target,
).load_freesolv()
elif ENCODING == "SELFIES":
X, y = LoadDatasetForTask(
X="gauche_selfies",
y="dataset/free_solv/FreeSolv.csv",
repn=encoding,
y_column=target,
).load_freesolv()
elif ENCODING == "GRAPHS":
X, y = LoadDatasetForTask(
X="gauche_graphs",
y="dataset/free_solv/FreeSolv.csv",
repn=encoding,
y_column=target,
).load_freesolv()

if ENCODING != "GRAPHS":
r2_list, rmse_list, mae_list, confidence_percentiles, mae_mean, mae_std = (
evaluate_model(
initialize_model=initialize_model,
n_trials=n_trials,
n_iters=n_iters,
test_set_size=holdout_set_size,
X=X,
y=y,
figure_path=f"results/{EXPERIMENT_TYPE}/confidence_mae_model_{ENCODING}_{target}.png",
)
)
else:
r2_list,rmse_list, mae_list, confidence_percentiles, mae_mean, mae_std = evaluate_graph_model(initialize_graph_gp, n_trials=n_trials, n_iters=n_iters, test_set_size=holdout_set_size, X=X, y=y, figure_path=f'results/{EXPERIMENT_TYPE}/confidence_mae_model_{ENCODING}_{target}.png')

mean_r2 = "\nmean R^2: {:.4f} +- {:.4f}".format(np.mean(r2_list), np.std(r2_list)/np.sqrt(len(r2_list)))
mean_rmse = "mean RMSE: {:.4f} +- {:.4f}".format(np.mean(rmse_list), np.std(rmse_list)/np.sqrt(len(rmse_list)))
mean_mae = "mean MAE: {:.4f} +- {:.4f}\n".format(np.mean(mae_list), np.std(mae_list)/np.sqrt(len(mae_list)))
r2_list, rmse_list, mae_list, confidence_percentiles, mae_mean, mae_std = (
evaluate_graph_model(
initialize_graph_gp,
n_trials=n_trials,
n_iters=n_iters,
test_set_size=holdout_set_size,
X=X,
y=y,
figure_path=f"results/{EXPERIMENT_TYPE}/confidence_mae_model_{ENCODING}_{target}.png",
)
)

mean_r2 = "\nmean R^2: {:.4f} +- {:.4f}".format(
np.mean(r2_list), np.std(r2_list) / np.sqrt(len(r2_list))
)
mean_rmse = "mean RMSE: {:.4f} +- {:.4f}".format(
np.mean(rmse_list), np.std(rmse_list) / np.sqrt(len(rmse_list))
)
mean_mae = "mean MAE: {:.4f} +- {:.4f}\n".format(
np.mean(mae_list), np.std(mae_list) / np.sqrt(len(mae_list))
)
return mean_r2, mean_rmse, mean_mae


if __name__ == '__main__':
EXPERIMENT_TYPE = 'FreeSolv'
ENCODING = 'GRAPHS'
if __name__ == "__main__":
EXPERIMENT_TYPE = "FreeSolv"
ENCODING = "GRAPHS"
N_TRIALS = 20
N_ITERS = 5
holdout_set_size = 0.33
# dataset processing
X,y = [], []
X, y = [], []
# dataset loading
possible_target_cols = ['expt','calc']
possible_target_cols = ["expt", "calc"]

results = []

for col in possible_target_cols:
mean_r2, mean_rmse, mean_mae = one_experiment(col, ENCODING,N_TRIALS,N_ITERS)
mean_r2, mean_rmse, mean_mae = one_experiment(col, ENCODING, N_TRIALS, N_ITERS)
results.append([col, mean_r2, mean_rmse, mean_mae])


if type(EXPERIMENT_TYPE) is str:
trial_num = len(os.listdir(f'results/{EXPERIMENT_TYPE}'))
trial_num = len(os.listdir(f"results/{EXPERIMENT_TYPE}"))
results_path = f"results/{EXPERIMENT_TYPE}/{ENCODING}_{time.time()}.txt"

with open(results_path, 'w') as f:
f.write(EXPERIMENT_TYPE + ':')
f.write('\n')
f.write(ENCODING + ':')
with open(results_path, "w") as f:
f.write(EXPERIMENT_TYPE + ":")
f.write("\n")
f.write(ENCODING + ":")
for result in results:
col, mean_r2, mean_rmse, mean_mae = result
f.write(f'column: {col}, {mean_r2}, {mean_rmse}, {mean_mae}')
f.write('\n')
f.write(f"column: {col}, {mean_r2}, {mean_rmse}, {mean_mae}")
f.write("\n")
f.close()
print("CONCLUDED")
print("CONCLUDED")
Loading

0 comments on commit 6c0803f

Please sign in to comment.