Hot Fix for Reaction Center Prediction (#116)

mufeili · web-flow · commit 30aa61c8fd1a · 2020-12-15T01:47:17.000+08:00
* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update
diff --git a/examples/reaction_prediction/rexgen_direct/README.md b/examples/reaction_prediction/rexgen_direct/README.md
@@ -218,12 +218,12 @@ You can then train a model on new datasets with
 python find_reaction_center_train.py --train-path X --val-path Y
 ```
 
-where `X`, `Y` are paths to the new training/validation as described above.
+where `X`, `Y` are paths to the new training/validation dataset as described above.
 
 For evaluation,
 
 ```bash
-python find_reaction_center_eval.py --eval-path Z
+python find_reaction_center_eval.py --test-path Z
 ```
 
 where `Z` is the path to the new test set as described above.
@@ -334,20 +334,22 @@ python candidate_ranking_eval.py
 You can train a model on new datasets with
 
 ```bash
-python candidate_ranking_train.py --train-path train_valid_reactions.proc --val-path val_valid_reactions.proc -cmp X
+python candidate_ranking_train.py --train-path X --val-path Y -cmp Z
 ```
 
-where `X` is the path to a trained model for reaction center prediction. You can use our 
-pre-trained model by not specifying `-cmp`.
+where `X`, `Y` are paths to the new training/validation dataset as in reaction center prediction. `Z` is
+the path to a trained model for reaction center prediction. You can use our pre-trained model by not specifying `-cmp`.
 
 For evaluation,
 
 ```bash
-python candidate_ranking_eval.py --model-path X -cmp Y --eval-path test_valid_reactions.proc
+python candidate_ranking_eval.py --model-path X -cmp Y --test-path Z
 ```
 
-where `X` is the path to a trained model for candidate ranking and `Y` is the path to a trained model 
-for reaction center prediction. As in training, you can use our pre-trained model by not specifying `-cmp`.
+where `X` is the path to a trained model for candidate ranking, `Y` is the path to a trained model 
+for reaction center prediction, and `Z` is the path to the new test dataset as in reaction center prediction.
+You can use the pre-trained model for reaction center prediction by not specifying `-cmp` and use the pre-trained
+model for candidate ranking by not specifying `--model-path`.
 
 ### Common Issues
 
diff --git a/examples/reaction_prediction/rexgen_direct/candidate_ranking_eval.py b/examples/reaction_prediction/rexgen_direct/candidate_ranking_eval.py
@@ -20,7 +20,7 @@ def main(args, path_to_candidate_bonds):
             num_processes=args['num_processes'])
     else:
         test_set = WLNRankDataset(
-            path_to_reaction_file=args['test_path'],
+            path_to_reaction_file='test_valid_reactions.proc',
             candidate_bond_path=path_to_candidate_bonds['test'], mode='test',
             max_num_change_combos_per_reaction=args['max_num_change_combos_per_reaction_eval'],
             num_processes=args['num_processes'])
diff --git a/examples/reaction_prediction/rexgen_direct/candidate_ranking_train.py b/examples/reaction_prediction/rexgen_direct/candidate_ranking_train.py
@@ -24,7 +24,7 @@ def main(args, path_to_candidate_bonds):
             num_processes=args['num_processes'])
     else:
         train_set = WLNRankDataset(
-            path_to_reaction_file=args['train_path'],
+            path_to_reaction_file='train_valid_reactions.proc',
             candidate_bond_path=path_to_candidate_bonds['train'], mode='train',
             max_num_change_combos_per_reaction=args['max_num_change_combos_per_reaction_train'],
             num_processes=args['num_processes'])
@@ -36,7 +36,7 @@ def main(args, path_to_candidate_bonds):
             num_processes=args['num_processes'])
     else:
         val_set = WLNRankDataset(
-            path_to_reaction_file=args['val_path'],
+            path_to_reaction_file='val_valid_reactions.proc',
             candidate_bond_path=path_to_candidate_bonds['val'], mode='val',
             max_num_change_combos_per_reaction=args['max_num_change_combos_per_reaction_eval'],
             num_processes=args['num_processes'])
@@ -133,6 +133,14 @@ def main(args, path_to_candidate_bonds):
                 t0 = time.time()
                 model.train()
 
+    # Final results
+    torch.save({'model_state_dict': model.state_dict()},
+               args['result_path'] + '/model_final.pkl')
+    prediction_summary = 'final\n' + candidate_ranking_eval(args, model, val_loader)
+    print(prediction_summary)
+    with open(args['result_path'] + '/val_eval.txt', 'a') as f:
+        f.write(prediction_summary)
+
 if __name__ == '__main__':
     from argparse import ArgumentParser
 
diff --git a/examples/reaction_prediction/rexgen_direct/find_reaction_center_train.py b/examples/reaction_prediction/rexgen_direct/find_reaction_center_train.py
@@ -124,6 +124,16 @@ def main(rank, dev_id, args):
                 model.train()
         synchronize(args['num_devices'])
 
+    # Final results
+    if rank == 0:
+        prediction_summary = 'final result ' + \
+                             reaction_center_final_eval(args, args['top_ks_val'], model, val_loader, easy=True)
+        print(prediction_summary)
+        with open(args['result_path'] + '/val_eval.txt', 'a') as f:
+            f.write(prediction_summary)
+        torch.save({'model_state_dict': model.state_dict()},
+                   args['result_path'] + '/model_final.pkl')
+
 def run(rank, dev_id, args):
     dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
         master_ip=args['master_ip'], master_port=args['master_port'])
diff --git a/examples/reaction_prediction/rexgen_direct/utils.py b/examples/reaction_prediction/rexgen_direct/utils.py
@@ -549,7 +549,8 @@ def prepare_reaction_center(args, reaction_center_config):
         else:
             dataset = WLNCenterDataset(raw_file_path=args['{}_path'.format(subset)],
                                        mol_graph_path='{}.bin'.format(subset),
-                                       num_processes=args['num_processes'])
+                                       num_processes=args['num_processes'],
+                                       reaction_validity_result_prefix=subset)
 
         dataloader = DataLoader(dataset, batch_size=args['reaction_center_batch_size'],
                                 collate_fn=collate_center, shuffle=False)