Skip to content

Commit

Permalink
added column constraints for tidy, glance, augment
Browse files Browse the repository at this point in the history
  • Loading branch information
behrica committed Apr 27, 2024
1 parent a17c8b7 commit 3d6ddfe
Show file tree
Hide file tree
Showing 5 changed files with 376 additions and 4 deletions.
31 changes: 29 additions & 2 deletions build.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
(ns build
(:refer-clojure :exclude [test])
(:require [clojure.tools.build.api :as b] ; for b/git-count-revs
[org.corfield.build :as bb]))
(:require
[camel-snake-kebab.core :as csk]
[clj-yaml.core :as yaml]
[clojure.java.io :as io]
[clojure.pprint :as pp]
[clojure.tools.build.api :as b] ; for b/git-count-revs
[org.corfield.build :as bb]))

(def lib 'scicloj/metamorph.ml)
; alternatively, use MAJOR.MINOR.COMMITS:
Expand Down Expand Up @@ -66,3 +71,25 @@
(-> opts
(assoc :lib lib :version version)
(bb/deploy)))

(defn build-glance-columns [ops]
(with-open [w (io/writer "resources/columms-glance.edn")]
(-> (slurp "https://raw.githubusercontent.com/alexpghayes/modeltests/main/data-raw/columns_glance.yaml")
(yaml/parse-string
:key-fn #(-> % :key csk/->kebab-case-keyword))
(pp/pprint w))))


(defn build-tidy-columns [opts]
(with-open [w (io/writer "resources/columms-tidy.edn")]
(-> (slurp "https://raw.githubusercontent.com/alexpghayes/modeltests/main/data-raw/columns_tidy.yaml")
(yaml/parse-string
:key-fn #(-> % :key csk/->kebab-case-keyword))
(pp/pprint w))))

(defn build-augment-columns [ops]
(with-open [w (io/writer "resources/columms-augment.edn")]
(-> (slurp "https://raw.githubusercontent.com/alexpghayes/modeltests/main/data-raw/columns_augment.yaml")
(yaml/parse-string
:key-fn #(-> % :key csk/->kebab-case-keyword))
(pp/pprint w))))
6 changes: 4 additions & 2 deletions deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

:build {:deps {io.github.clojure/tools.build {:mvn/version "0.9.6"}
slipset/deps-deploy {:mvn/version "0.2.1"}
io.github.seancorfield/build-clj {:git/tag "v0.9.2" :git/sha "9c9f078"}}
io.github.seancorfield/build-clj {:git/tag "v0.9.2" :git/sha "9c9f078"}
clj-commons/clj-yaml {:mvn/version "1.0.27"}
camel-snake-kebab/camel-snake-kebab {:mvn/version "0.4.3"}}
:ns-default build}


Expand All @@ -29,7 +31,7 @@
{:jvm-opts ["-Djdk.attach.allowAttachSelf"]
:extra-paths ["test"]
:extra-deps {
lambdaisland/kaocha {:mvn/version "0.15.957"}
lambdaisland/kaocha {:mvn/version "1.88.1376"}
scicloj/scicloj.ml.smile {:mvn/version "7.3"}
datacraft-sciences/confuse {:mvn/version "0.1.1"}
ch.qos.logback/logback-classic {:mvn/version "1.5.6"}}}
Expand Down
50 changes: 50 additions & 0 deletions resources/columms-augment.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{:.class "Predicted class.",
:.cluster "Cluster assignment.",
:.cochran.qe.loo
"Leave-one-out residual heterogeneity test statistics.",
:.col.prop "Column proportion (2 dimensional table only).",
:.conf.high "Upper bound on confidence interval for fitted values.",
:.conf.low "Lower bound on confidence interval for fitted values.",
:.cooksd "Cooks distance.",
:.cov.ratio "The covariance ratio.",
:.cred.high "Upper bound on credible interval for fitted values.",
:.cred.low "Lower bound on credible interval for fitted values.",
:.dffits
"Estimated change in standard deviations for the predicted effect after excluding the study",
:.expected "Expected count under the null hypothesis.",
:.fitted "Fitted or predicted value.",
:.fitted-j-0
"Population-level fitted values for the j-th longitudinal process.",
:.fitted-j-1
"Individual-level fitted values for the j-th longitudinal process.",
:.hat "Diagonal of the hat matrix.",
:.lower "Lower bound on interval for fitted values.",
:.moderator
"In meta-analysis, the moderators used to calculate the predicted values.",
:.moderator.level
"In meta-analysis, the level of the moderators used to calculate the predicted values.",
:.observed "Observed count.",
:.probability "Class probability of modal class.",
:.prop "Proportion of the total.",
:.remainder
"The remainder, or random, component of the decomposition.",
:.resid "The difference between observed and fitted values.",
:.resid-j-0
"Population-level residuals for the j-th longitudinal process.",
:.resid-j-1
"Individual-level residuals for the j-th longitudinal process.",
:.row.prop "Row proportion (2 dimensions table only).",
:.rownames "Rownames from the original data, if present.",
:.se.fit "Standard errors of fitted values.",
:.seasadj "The seasonally adjusted (or *deseasonalised*) series.",
:.seasonal "The seasonal component of the decomposition.",
:.sigma
"Estimated residual standard deviation when corresponding observation is dropped from model.",
:.std.resid "Standardised residuals.",
:.tau "Quantile.",
:.tau.squared.loo "leave-one-out tau-squared estimates",
:.trend "The trend component of the decomposition.",
:.uncertainty
"The uncertainty associated with the classification. Equal to one minus the model class probability.",
:.upper "Upper bound on interval for fitted values.",
:.weight "The final robust weights."}
153 changes: 153 additions & 0 deletions resources/columms-glance.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
{:adj.r.squared
"Adjusted R squared statistic, which is like the R squared statistic except taking degrees of freedom into account.",
:agfi "Adjusted goodness of fit.",
:aic "Akaike's Information Criterion for the model.",
:ai-cc
"Small sample corrected Akaike's Information Criterion for the model.",
:alpha "Estimated correlation parameter for geepack::geeglm.",
:alternative "The alternative hypothesis. Usually character.",
:autocorrelation "Autocorrelation.",
:avg.silhouette.width "The average silhouette width for the dataset.",
:betweenss "The total between-cluster sum of squares.",
:bic "Bayesian Information Criterion for the model.",
:cfi "Comparative fit index.",
:chi.squared
"The Pearson Chi-Square goodness of fit statistic for multiway tables.",
:chisq "Model chi squared.",
:cochran.qe
"In meta-analysis, test statistic for the Cochran's Q_e test of residual heterogeneity.",
:cochran.qm
"In meta-analysis, test statistic for the Cochran's Q_m omnibus test of coefficients.",
:conf.high "Upper bound on confidence interval.",
:conf.low "Lower bound on confidence interval.",
:converged
"Logical indicating if the model fitting procedure was succesful and converged.",
:convergence "Convergence code.",
:crit "Minimized criterion",
:cv.crit "Cross-validation score",
:den.df "Degrees of freedom of the denominator",
:deviance "Deviance of the model.",
:df "Degrees of freedom used by the model.",
:df.null "Degrees of freedom used by the null model.",
:df.residual "Residual degrees of freedom.",
:dw.original "Durbin-Watson statistic of original fit.",
:dw.transformed "Durbin-Watson statistic of transformed fit.",
:edf "The effective degrees of freedom.",
:estimator "Estimator used.",
:events "Number of events.",
:fin-tol "The achieved convergence tolerance.",
:function.count "Number of calls to `fn`.",
:g "The optimal number of mixture components.",
:g.squared "The likelihood ratio/deviance statistic.",
:gamma "Estimated scale parameter for geepack::geeglm.",
:gradient.count "Number of calls to `gr`.",
:h
"H statistic for computing confidence interval of major axis slope.",
:h.squared "Value of the H-Squared statistic.",
:hypvol
"If the other model contains a noise component, the value of the hypervolume parameter. Otherwise `NA`.",
:i.squared "Value of the I-Squared statistic.",
:independence "Whether the model assumed dyadic independence.",
:is-conv "Whether the fit successfully converged.",
:iter "Iterations of algorithm/fitting procedure completed.",
:iterations "The number of iterations performed before convergence.",
:k-hkb "Modified HKB estimate of the ridge constant.",
:k-lw "Modified L-W estimate of the ridge constant.",
:lag.order "Lag order.",
:lambda "Choice of lambda corresponding to `spar`.",
:lambda.-1se
"The value of the penalization parameter lambda that results in the sparsest model while remaining within one standard error of the minimum loss.",
:lambda.min
"The value of the penalization parameter lambda that achieved minimum loss as estimated by cross validation.",
:lambda-gcv "choice of lambda that minimizes GCV.",
:log-lik
"The log-likelihood of the model. [stats::logLik()] may be a useful reference.",
:max.cluster.size "Max number of elements in clusters.",
:max.hazard "Maximal estimated hazard.",
:max.time "The maximum observed event or censoring time.",
:maxit "Number of iterations performed.",
:mcmc.burnin "The burn-in period of the MCMC estimation.",
:mcmc.interval "The interval used during MCMC estimation.",
:mcmc.samplesize "The sample size used during MCMC estimation.",
:measure "The measure used in the meta-analysis.",
:median "median survival.",
:method "Which method was used.",
:min.hazard "Minimal estimated hazard.",
:min.time "The minimum observed event or censoring time.",
:missing-method "Method for eliminating missing data.",
:model
"A character string denoting the model at which the optimal BIC occurs.",
:n "The total number of observations.",
:n.clusters "Number of clusters.",
:n.factors "The number of fitted factors.",
:n.max "Maximum number of subjects at risk.",
:n.start "Initial number of subjects at risk.",
:nevent "Number of events.",
:nexcluded "Number of excluded observations.",
:ngroups "Number of groups in model.",
:nobs "Number of observations used.",
:norig "Number of observation in the original dataset.",
:npar "Number of parameters in the model.",
:npasses "Total passes over the data across all lambda values.",
:null.deviance "Deviance of the null model.",
:nulldev "Null deviance.",
:num.df "Degrees of freedom of the numerator.",
:number.interaction "Number of interactions.",
:offtable "Total number of person-years off table.",
:p.value "P-value corresponding to the test statistic.",
:p.value.cochran.qe
"In meta-analysis, p-value for the Cochran's Q_e test of residual heterogeneity.",
:p.value.cochran.qm
"In meta-analysis, p-value for the Cochran's Q_m omnibus test of coefficients.",
:p.value.original "P-value of original Durbin-Watson statistic.",
:p.value.-sargan "P-value for Sargan test.",
:p.value.transformed
"P-value of autocorrelation after transformation.",
:p.value.weak.instr "P-value for weak instrument F-test.",
:p.value.-wu.-hausman "P-value for Wu-Hausman test.",
:parameter
"Parameter field in the htest, typically degrees of freedom.",
:pen.crit "Penalized criterion.",
:power "Power achieved by the analysis.",
:power.reached "Whether the desired power was reached.",
:pseudo.r.squared
"Like the R squared statistic, but for situations when the R squared statistic isn't defined.",
:r.squared
"R squared statistic, or the percent of variation explained by the model. Also known as the coefficient of determination.",
:records "Number of observations",
:residual.deviance "The residual deviance of the model",
:rho "Spearman's rho autocorrelation",
:rho-2
"McFadden's rho squared with respect to a market shares (constants-only) model.",
:rho-20
"McFadden's rho squared with respect to an equal shares (no information) model.",
:rmean "Restricted mean (see [survival::print.survfit()]).",
:rmean.std.error "Restricted mean standard error.",
:rmsea "Root mean square error of approximation.",
:rmsea.conf.high "95 percent upper bound on RMSEA.",
:rscore "Robust log-rank statistic",
:score "Score.",
:sigma "Estimated standard error of the residuals.",
:sigma-2-j
"The square root of the estimated residual variance for the j-th longitudinal process.",
:spar "Smoothing parameter.",
:srmr "Standardised root mean residual.",
:statistic "Test statistic.",
:statistic.-sargan "Statistic for Sargan test.",
:statistic.weak.instr "Statistic for weak instrument F-test.",
:statistic.-wu.-hausman "Statistic for Wu-Hausman test.",
:tau "Quantile.",
:tau.squared
"In meta-analysis, estimated amount of residual heterogeneity.",
:tau.squared.se
"In meta-analysis, standard error of residual heterogeneity.",
:theta "Angle between OLS lines `lm(y ~ x)` and `lm(x ~ y)`.",
:timepoints "Number of timepoints.",
:tli "Tucker Lewis index.",
:tot.withinss "The total within-cluster sum of squares.",
:total "Total number of person-years tabulated.",
:total.variance
"Total cumulative proportion of variance accounted for by all factors.",
:totss "The total sum of squares.",
:value "Minimized or maximized output value.",
:within.r.squared "R squared within fixed-effect groups."}
Loading

0 comments on commit 3d6ddfe

Please sign in to comment.