Skip to content

Commit

Permalink
Merge branch 'main' of github.com:scicloj/noj
Browse files Browse the repository at this point in the history
  • Loading branch information
daslu committed Dec 23, 2024
2 parents 3388ec3 + 354127b commit 624c522
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 45 deletions.
5 changes: 1 addition & 4 deletions deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
org.scicloj/scicloj.ml.xgboost {:mvn/version "6.2.0"}

org.scicloj/scicloj.ml.tribuo {:mvn/version "0.1.4"}
techascent/tech.ml.dataset {:mvn/version "7.032"} ;; no JVM crash
;;techascent/tech.ml.dataset {:mvn/version "7.033"} ;; JVM crash
;;techascent/tech.ml.dataset {:mvn/version "7.034"} ;; JVM crash

org.tribuo/tribuo-regression-liblinear {:mvn/version "4.3.1"}
org.tribuo/tribuo-regression-libsvm {:mvn/version "4.3.1"}
Expand All @@ -30,7 +27,7 @@
org.tribuo/tribuo-classification-xgboost {:mvn/version "4.3.1"}


clj-python/libpython-clj {:mvn/version "2.025"}
clj-python/libpython-clj {:mvn/version "2.026"}
org.scicloj/kind-pyplot {:mvn/version "1-beta2.1"}
scicloj/clojisr {:mvn/version "1.0.0"}

Expand Down
83 changes: 53 additions & 30 deletions notebooks/noj_book/automl.clj
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
(ns noj-book.automl
(:require [noj-book.ml-basic :as ml-basic]
[scicloj.kindly.v4.kind :as kind]
[scicloj.kindly.v4.api :as kindly]
[scicloj.metamorph.ml :as ml]))
[scicloj.metamorph.ml :as ml]
[tablecloth.api :as tc]
[scicloj.metamorph.ml.loss :as loss]
[scicloj.metamorph.core :as mm]
[scicloj.metamorph.ml.gridsearch :as gs]
[tech.v3.dataset.modelling :as ds-mod]))

;; ## The metamorph pipeline abstraction
;; When doing automl, it is very useful to be able to manage
Expand Down Expand Up @@ -278,6 +282,7 @@ ctx-after-train
'[scicloj.metamorph.core :as mm]
'[scicloj.ml.tribuo]
'[scicloj.ml.xgboost]
'[scicloj.ml.smile.classification]
'[scicloj.sklearn-clj.ml])


Expand All @@ -289,7 +294,7 @@ ctx-after-train

;; the following will find the best model across:
;;
;; * 4 different model classes
;; * 4 different model classes with different hyper params
;;
;; * 6 different selections of used features
;;
Expand All @@ -308,29 +313,47 @@ ctx-after-train
(def titanic-k-fold (tc/split->seq ml-basic/numeric-titanic-data :kfold {:seed 12345}))

(-> titanic-k-fold count)

;; We add as well 10 hyper-parameter variants for logistic regression
;; obtained via Sobol search over the hyper parameter space of the model.
(def hyper-params
(->>
(ml/hyperparameters :smile.classification/logistic-regression)
(gs/sobol-gridsearch)
(take 10)))
hyper-params

(def logistic-regression-specs
(map
#(assoc %
:model-type :smile.classification/logistic-regression)
hyper-params))
logistic-regression-specs


;; The list of the model types we want to try:
(def models [{ :model-type :xgboost/classification
:round 10}
{:model-type :sklearn.classification/decision-tree-classifier}
{:model-type :sklearn.classification/logistic-regression}
{:model-type :sklearn.classification/random-forest-classifier}
{:model-type :metamorph.ml/dummy-classifier}
{:model-type :scicloj.ml.tribuo/classification
:tribuo-components [{:name "logistic"
:type "org.tribuo.classification.sgd.linear.LinearSGDTrainer"}]
:tribuo-trainer-name "logistic"}
{:model-type :scicloj.ml.tribuo/classification
:tribuo-components [{:name "random-forest"
:type "org.tribuo.classification.dtree.CARTClassificationTrainer"
:properties {:maxDepth "8"
:useRandomSplitPoints "false"
:fractionFeaturesInSplit "0.5"}}]
:tribuo-trainer-name "random-forest"}])


;; This uses models from Smile and Tribuo, but could be any
;; metamorph.ml compliant model ( library `sklearn-clj` wraps all python sklearn
;; models, for example)
(def models-specs
(concat logistic-regression-specs
[{:model-type :xgboost/classification :round 10}
{:model-type :sklearn.classification/decision-tree-classifier}
{:model-type :sklearn.classification/logistic-regression}
{:model-type :sklearn.classification/random-forest-classifier}
{:model-type :metamorph.ml/dummy-classifier}
{:model-type :scicloj.ml.tribuo/classification
:tribuo-components [{:name "logistic"
:type "org.tribuo.classification.sgd.linear.LinearSGDTrainer"}]
:tribuo-trainer-name "logistic"}
{:model-type :scicloj.ml.tribuo/classification
:tribuo-components [{:name "random-forest"
:type "org.tribuo.classification.dtree.CARTClassificationTrainer"
:properties {:maxDepth "8"
:useRandomSplitPoints "false"
:fractionFeaturesInSplit "0.5"}}]
:tribuo-trainer-name "random-forest"}]))


;; This uses models from Smile, Tribuo and sklearn but could be any
;; metamorph.ml compliant model

;; The list of feature combinations to try for each model:
(def feature-combinations
Expand All @@ -341,11 +364,11 @@ ctx-after-train
[:sex :embarked]
[:sex :pclass]])

;; generate 42 pipeline functions:
;; generate 102 pipeline functions:
(def pipe-fns
(for [model models
(for [model-spec models-specs
feature-combination feature-combinations]
(make-pipe-fn model feature-combination)))
(make-pipe-fn model-spec feature-combination)))

(count pipe-fns)
;; Execute all pipelines for all splits in the cross-validations
Expand Down Expand Up @@ -383,7 +406,7 @@ ctx-after-train


;; In total it creates and evaluates
;; 7 models * 6 feature configurations * 5 CV = 210 models
;; 17 models (incl. hyper parameters variations) * 6 feature configurations * 5 CV = 510 models
(-> evaluation-results-all flatten count)

;; We can find the best as well by hand, it's the first from the list,
Expand All @@ -392,7 +415,7 @@ ctx-after-train
(tc/unique-by)
(tc/order-by [:mean-accuracy] :desc)
(tc/head 20)
(kind/table))
(kind/dataset))


;; ## Best practices for data transformation steps in or outside pipeline
Expand Down
16 changes: 5 additions & 11 deletions notebooks/noj_book/smile_regression.clj
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
;; # Smile regression models reference - DRAFT 🛠

^:kindly/hide-code
(require '[scicloj.ml.smile.regression]
'[scicloj.ml.tribuo])

(ns noj-book.smile-regression
(:require
[noj-book.utils.render-tools :refer [render-key-info]]
[scicloj.kindly.v4.kind :as kind]
[scicloj.metamorph.core :as mm]
[scicloj.metamorph.ml :as ml]
[scicloj.metamorph.ml.toydata :as datasets]
[tablecloth.api :as tc]
[tech.v3.dataset :as ds]
[tech.v3.dataset.metamorph :as ds-mm]
[tech.v3.datatype.functional :as dtf]))

^:kindly/hide-code
(require '[scicloj.ml.smile.regression])
[scicloj.ml.tribuo]))


^:kindly/hide-code
Expand Down

0 comments on commit 624c522

Please sign in to comment.