Skip to content

Commit 52ba5ea

Browse files
d-vctdianevctehudkr
authored
Simulator dependency update (#71)
Update pandas>2 and networkx>3 for simulation module and some unit tests #71 Add unit tests. Github Actions skip on error for Code Climate failures to avoid failing the entire workflow (even if tests are ok) just because it didn't have access to CC token for uploading coverage report. --------------------------------------------------------- * Allow networkx >3 dependency --------- Signed-off-by: Diane Vincent <[email protected]> * Allow pandas>2 dependency * Add tests Different link types: - test_affine_linking - test_poly_linking - test_exp_linking - test_log_linking Effect modifier: check that it behaves correctly with marginal structural model - test_effect_modifier * Dummy commit to engage CodeClimate? It seems forked-branch pull-request do not initiate CodeClimate properly, causing the entire PR look like it failed. The problem is that the forked PR doesn't have access to the upstream's secret, so CodeClimate doesn't have its token: https://github.com/BiomedSciAI/causallib/actions/runs/10010566187/job/27714442507?pr=71#step:8:17 Before I contemplate whether to make that not-really-secret secret a hardcoded token instead, I want to test whether making a dummy commit by a permitted account could make it run properly. Signed-off-by: Ehud-Karavani <[email protected]> * Don't fail entire pipeline for failed coverage report upload Forked PRs have no access to secrets, so uploading a coverage report to Code Climate can fail as no token will be provided. To avoid that failing the entire workflow, try to make that step optional and see what happens on Github Actions. Signed-off-by: Ehud-Karavani <[email protected]> --------- Signed-off-by: Diane Vincent <[email protected]> Signed-off-by: Ehud-Karavani <[email protected]> Co-authored-by: Diane Vincent <[email protected]> Co-authored-by: ehudkr <[email protected]> Co-authored-by: Ehud Karavani <[email protected]> Co-authored-by: Ehud-Karavani <[email protected]>
1 parent bdf033c commit 52ba5ea

File tree

3 files changed

+103
-4
lines changed

3 files changed

+103
-4
lines changed

.github/workflows/build.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,8 @@ jobs:
4343
- name: Publish to CodeClimate
4444
uses: paambaati/[email protected]
4545
env:
46-
CC_TEST_REPORTER_ID: ${{ secrets.CODECLIMATE_REPORTER_ID }}
46+
CC_TEST_REPORTER_ID: ${{ secrets.CODECLIMATE_REPORTER_ID }}
47+
# Forked PRs have no access to secrets, so uploading a coverage report to Code Climate fails.
48+
# To avoid that failing the entire workflow, continue on error:
49+
continue-on-error: true
4750

causallib/simulation/CausalSimulator3.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def __init__(self, topology, var_types, prob_categories, link_types, snr, treatm
216216

217217
# check that effect modifier is independent on treatment and affects only the outcome:
218218
for i in self.effmod_indices:
219-
successors = self.graph_topology.successors(i)
219+
successors = list(self.graph_topology.successors(i))
220220
if len(successors) == 0 or self.outcome_indices.intersection(successors).size < 1:
221221
raise ValueError("Effect modifier variable {name} must affect an outcome variable".format(name=i))
222222
ancestors = nx.ancestors(self.graph_topology, i)
@@ -441,7 +441,7 @@ def generate_data(self, X_given=None, num_samples=None, random_seed=None):
441441

442442
# generate latent continuous covariates - every variable is guaranteed to have a population variance of 1.0
443443
# X_latent = pd.DataFrame(index=patients_index, columns=self.var_types.index)
444-
X = pd.DataFrame(index=patients_index, columns=self.var_types.index)
444+
X = pd.DataFrame(index=patients_index, columns=self.var_types.index, dtype=float)
445445
if X_given is not None: # if a dataset is given, integrate it to the current dataset being build.
446446
X.loc[:, X_given.columns] = X_given
447447
for col in X_given.columns:
@@ -1342,7 +1342,7 @@ def _poly_linking(X_parents, beta=None):
13421342
beta = pd.DataFrame(data=np.random.normal(loc=0.0, scale=4.0, size=(degree, X_parents.columns.size)),
13431343
columns=X_parents.columns, index=np.arange(degree))
13441344

1345-
result_polynomial = pd.DataFrame(data=None, index=X_parents.index, columns=X_parents.columns)
1345+
result_polynomial = pd.DataFrame(data=None, index=X_parents.index, columns=X_parents.columns, dtype=float)
13461346
degrees = beta.index.to_series()
13471347
# Apply a polynomial to every parent variable
13481348
for var_name, col in X_parents.items():

causallib/tests/test_causal_simulator3.py

+96
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,82 @@ def test_linear_linking(self):
357357
msg="discovered rank of matrix is {emp} instead of {des}."
358358
"so the linear linking does not work properly".format(emp=rank, des=2))
359359

360+
def test_affine_linking(self):
361+
topology = np.zeros((3, 3), dtype=bool)
362+
topology[2, 0] = topology[2, 1] = True
363+
var_types = ["covariate", "treatment", "outcome"]
364+
snr = 1
365+
prob_cat = [None, [0.5, 0.5], None]
366+
treatment_importance = None
367+
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
368+
link_types="affine", treatment_importances=treatment_importance,
369+
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
370+
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)
371+
372+
singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
373+
eps = 1e-10
374+
rank = np.sum(singular_values > eps)
375+
self.assertEqual(rank, 3,
376+
msg="discovered rank of matrix is {emp} instead of {des}."
377+
"so the affine linking does not work properly".format(emp=rank, des=3))
378+
379+
def test_poly_linking(self):
380+
topology = np.zeros((3, 3), dtype=bool)
381+
topology[2, 0] = topology[2, 1] = True
382+
var_types = ["covariate", "treatment", "outcome"]
383+
snr = 1
384+
prob_cat = [None, [0.5, 0.5], None]
385+
treatment_importance = None
386+
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
387+
link_types="poly", treatment_importances=treatment_importance,
388+
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
389+
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)
390+
391+
singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
392+
eps = 1e-10
393+
rank = np.sum(singular_values > eps)
394+
self.assertEqual(rank, 3,
395+
msg="discovered rank of matrix is {emp} instead of {des}."
396+
"so the poly linking does not work properly".format(emp=rank, des=3))
397+
398+
def test_exp_linking(self):
399+
topology = np.zeros((3, 3), dtype=bool)
400+
topology[2, 0] = topology[2, 1] = True
401+
var_types = ["covariate", "treatment", "outcome"]
402+
snr = 1
403+
prob_cat = [None, [0.5, 0.5], None]
404+
treatment_importance = None
405+
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
406+
link_types="exp", treatment_importances=treatment_importance,
407+
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
408+
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)
409+
410+
singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
411+
eps = 1e-10
412+
rank = np.sum(singular_values > eps)
413+
self.assertEqual(rank, 3,
414+
msg="discovered rank of matrix is {emp} instead of {des}."
415+
"so the exp linking does not work properly".format(emp=rank, des=3))
416+
417+
def test_log_linking(self):
418+
topology = np.zeros((3, 3), dtype=bool)
419+
topology[2, 0] = topology[2, 1] = True
420+
var_types = ["covariate", "treatment", "outcome"]
421+
snr = 1
422+
prob_cat = [None, [0.5, 0.5], None]
423+
treatment_importance = None
424+
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
425+
link_types="log", treatment_importances=treatment_importance,
426+
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
427+
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)
428+
429+
singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
430+
eps = 1e-10
431+
rank = np.sum(singular_values > eps)
432+
self.assertEqual(rank, 3,
433+
msg="discovered rank of matrix is {emp} instead of {des}."
434+
"so the log linking does not work properly".format(emp=rank, des=3))
435+
360436
def test_treatment_logistic(self):
361437
topology = np.zeros((6, 6), dtype=bool)
362438
topology[2, 0] = topology[3, 0] = topology[2, 1] = topology[3, 1] = topology[4, 2] = topology[5, 3] = True
@@ -533,6 +609,26 @@ def test_censoring(self):
533609
# TODO: test different link types
534610
# TODO: test marginal structural model (both in continuous, dichotomous and probability settings)
535611

612+
def test_effect_modifier(self):
613+
topology = np.zeros((4, 4), dtype=bool)
614+
topology[2, 0] = topology[2, 1] = topology[2, 3] = True
615+
var_types = ["effect_modifier", "treatment", "outcome", "covariate"]
616+
snr = 1
617+
prob_cat = [None, [0.5, 0.5], None, None]
618+
treatment_importance = None
619+
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
620+
link_types=["linear","linear","marginal_structural_model","linear"], treatment_importances=treatment_importance,
621+
outcome_types="continuous", snr=snr, effect_sizes=None)
622+
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)
623+
624+
beta = sim.linking_coefs
625+
self.assertNotEqual(beta[2].loc[0,0], beta[2].loc[0,1],
626+
msg="coefficients for potential outcomes are the same: {beta_1} = {beta_0}."
627+
"so the effect modifier does not behave properly".format(beta_0=beta[2].loc[0,0], beta_1=beta[2].loc[0,1]))
628+
self.assertEqual(beta[2].loc[3,0], beta[2].loc[3,1],
629+
msg="coefficients for potential outcomes are not the same: {beta_1} != {beta_0}."
630+
"so the covariate does not behave properly".format(beta_0=beta[2].loc[0,0], beta_1=beta[2].loc[0,1]))
631+
536632

537633
if __name__ == "__main__":
538634
unittest.main()

0 commit comments

Comments
 (0)