Skip to content

Commit

Permalink
Init t-SNE with a given data set (#103)
Browse files Browse the repository at this point in the history
* Add init tsne settings and init randomly

* Set init embedding from other dataset

* static random generator for settings

* Update seed functions for settings

* Add settings to plugin

* Update UI

* Same CI as core

* Allow 0 iterations
  • Loading branch information
alxvth authored Feb 27, 2024
1 parent 4c480ba commit 5b6c87f
Show file tree
Hide file tree
Showing 8 changed files with 350 additions and 30 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,23 @@ jobs:
build-runtime: MD
build-config: Release

- name: Linux_gcc10
- name: Linux_gcc11
os: ubuntu-22.04
build-cc: gcc
build-cxx: g++
build-compiler: gcc
build-cversion: 10
build-cversion: 11
build-config: Release
build-os: Linux
build-libcxx: libstdc++

- name: Macos_xcode12.4
os: macos-11
- name: Macos_xcode13.4
os: macos-12
build-compiler: apple-clang
build-cversion: "12.0"
build-cversion: 13
build-config: Release
build-os: Macos
build-xcode-version: 12.4
build-xcode-version: 13.4
build-libcxx: libc++

steps:
Expand Down
2 changes: 1 addition & 1 deletion src/Common/TsneComputationAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ using namespace mv::gui;

TsneComputationAction::TsneComputationAction(GroupAction* parent, TsneParameters* tsneParameters) :
WidgetAction(parent, "TsneComputationAction"),
_numIterationsAction(this, "New iterations", 1, 10000, 1000),
_numIterationsAction(this, "New iterations", 0, 10000, 1000),
_numberOfComputatedIterationsAction(this, "Computed iterations", 0, std::numeric_limits<int>::max(), 0),
_updateIterationsAction(this, "Core update every", 0, 10000, 10),
_startComputationAction(this, "Start"),
Expand Down
2 changes: 2 additions & 0 deletions src/tSNE/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,7 @@ set(TSNE_ACTIONS_SOURCES
${DIR}/TsneSettingsAction.cpp
${DIR}/GeneralTsneSettingsAction.h
${DIR}/GeneralTsneSettingsAction.cpp
${DIR}/InitTsneSettings.h
${DIR}/InitTsneSettings.cpp
PARENT_SCOPE
)
219 changes: 219 additions & 0 deletions src/tSNE/InitTsneSettings.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
#include "InitTsneSettings.h"

#include "TsneSettingsAction.h"

#include <cmath>
#include <numeric>
#include <random>
#include <utility>

constexpr auto SEEDMIN = -1000;
constexpr auto SEEDMAX = 1000;

static std::random_device rd;
static std::default_random_engine gen(rd());
static std::uniform_int_distribution<int> dst(SEEDMIN, SEEDMAX);

static inline int NewRandomSeed() { return dst(gen); }

using namespace mv::gui;

InitTsneSettings::InitTsneSettings(TsneSettingsAction& tsneSettingsAction) :
GroupAction(&tsneSettingsAction, "Initialization", true),
_tsneSettingsAction(tsneSettingsAction),
_randomInitAction(this, "Random inital embedding", true),
_newRandomSeedAction(this, "New seed on re-initialization", true),
_randomSeedAction(this, "Random seed"),
_datasetInitAction(this, "Init dataset"),
_dataDimensionActionX(this, "Init dim X"),
_dataDimensionActionY(this, "Init dim Y"),
_rescaleInitAction(this, "Rescale to small std dev", true),
_numPointsInputData(0)
{
addAction(&_randomInitAction);
addAction(&_randomSeedAction);
addAction(&_newRandomSeedAction);
addAction(&_datasetInitAction);
addAction(&_dataDimensionActionX);
addAction(&_dataDimensionActionY);
addAction(&_rescaleInitAction);

_randomInitAction.setToolTip("Init t-SNE randomly.");
_newRandomSeedAction.setToolTip("Use a new random seed when re-initializing the embedding.");
_randomSeedAction.setToolTip("Seed for random init.");
_datasetInitAction.setToolTip("Dataset to use for init.");
_dataDimensionActionX.setToolTip("Dimensions of dataset to use for inititial embedding X dimension.");
_dataDimensionActionY.setToolTip("Dimensions of dataset to use for inititial embedding Y dimension.");
_rescaleInitAction.setToolTip("Whether to rescale the init embedding such that the standard deviation of \nthe first embedding dimension is 0.0001.");

_datasetInitAction.setEnabled(false);
_dataDimensionActionX.setEnabled(false);
_dataDimensionActionY.setEnabled(false);

// always start with a random seed
_randomSeedAction.initialize(SEEDMIN, SEEDMAX, NewRandomSeed());

updateDataPicker(0);

connect(&_datasetInitAction, &DatasetPickerAction::datasetPicked , this, [this](mv::Dataset<mv::DatasetImpl> pickedDataset) {
_dataDimensionActionX.setPointsDataset(pickedDataset);
_dataDimensionActionY.setPointsDataset(pickedDataset);

_dataDimensionActionX.setCurrentDimensionIndex(0);
_dataDimensionActionY.setCurrentDimensionIndex(1);
});

connect(&_randomInitAction, &ToggleAction::toggled , this, [this](bool) {
const auto checked = _randomInitAction.isChecked();

_newRandomSeedAction.setEnabled(checked);
_newRandomSeedAction.setCheckable(checked);
_randomSeedAction.setEnabled(checked);
_randomSeedAction.setCheckable(checked);

_datasetInitAction.setEnabled(!checked);
_dataDimensionActionX.setEnabled(!checked);
_dataDimensionActionY.setEnabled(!checked);
});

const auto updateReadOnly = [this]() -> void {
auto enable = !isReadOnly();

_randomInitAction.setEnabled(enable);
_randomSeedAction.setEnabled(enable);
_newRandomSeedAction.setEnabled(enable);
_rescaleInitAction.setEnabled(enable);

if (enable && _randomInitAction.isChecked())
enable = false;

_datasetInitAction.setEnabled(enable);
_dataDimensionActionX.setEnabled(enable);
_dataDimensionActionY.setEnabled(enable);
};

connect(this, &GroupAction::readOnlyChanged, this, [this, updateReadOnly](const bool& readOnly) {
updateReadOnly();
});
}

void InitTsneSettings::updateDataPicker(size_t numPointsInputData) {
_numPointsInputData = numPointsInputData;

_datasetInitAction.setDatasetsFilterFunction([numPointsInput = this->_numPointsInputData](const mv::Datasets& datasets) -> Datasets {
Datasets possibleInitDataset;

for (const auto& dataset : datasets)
if (dataset->getDataType() == PointType)
{
const auto pointDataset = Dataset<Points>(dataset);
if (pointDataset->getNumDimensions() >= 2 && pointDataset->getNumPoints() == numPointsInput)
possibleInitDataset << dataset;
}

return possibleInitDataset;
});
};

std::vector<float> InitTsneSettings::getInitEmbedding(size_t numPoints)
{
assert(numPoints > 0);

std::vector<float> initPositions(numPoints * 2, -1.f);

if (_randomInitAction.isChecked())
{
qDebug() << "Initialize t-SNE embedding randomly";

std::default_random_engine gen(_randomSeedAction.getValue());
std::uniform_real_distribution<float> dis(0, 1);

auto randomVec = [&gen, &dis]() -> std::pair<float, float> {

const float r = std::sqrt(dis(gen)); // random radius: uniformly sample from [0, 1], sqrt (important!)
const float t = 2.0f * 3.141592f * dis(gen); // random angle: uniformly sample from [0, 1] and scale to [0, 2pi]

return std::pair{ /* x = */ r * std::cos(t), /* y = */ r * std::sin(t) }; // conversion to cartesian coordinates
};

for (size_t i = 0; i < numPoints; ++i) {
auto randomPoint = randomVec();

initPositions[i * 2] = randomPoint.first;
initPositions[i * 2 + 1] = randomPoint.second;
}
}
else
{
auto initData = _datasetInitAction.getCurrentDataset<Points>();
auto xDim = _dataDimensionActionX.getCurrentDimensionIndex();
auto yDim = _dataDimensionActionY.getCurrentDimensionIndex();

qDebug() << "Initialize t-SNE embedding with " << initData->getGuiName() << " using dimensions " << xDim << " and " << yDim;

initData->populateDataForDimensions(initPositions, std::vector<int32_t>{ xDim , yDim });
}

if (_rescaleInitAction.isChecked())
{
const float stdevDesired = 0.0001f;

qDebug() << "Rescale initial embedding such that the standard deviation of its first dimension is " << stdevDesired;

// Calculate the mean and standard deviation of the first embedding dimension
float sum = 0.f;
for (size_t i = 0; i < numPoints; ++i)
sum += initPositions[i * 2];

float mean = sum / numPoints;

float stdevCurrent = 0.f;
for (size_t i = 0; i < numPoints; ++i)
stdevCurrent += std::pow(initPositions[i * 2] - mean, 2);

stdevCurrent = std::sqrt(stdevCurrent / numPoints);

// Re-scale the data to match the desired standard deviation
float scaleFactor = stdevDesired / stdevCurrent;
for (size_t i = 0; i < numPoints; ++i) {
initPositions[i * 2] = (initPositions[i * 2] - mean) * scaleFactor + mean;
initPositions[i * 2 + 1] = (initPositions[i * 2 + 1] - mean) * scaleFactor + mean;
}
}

return initPositions;
}

void InitTsneSettings::updateSeed()
{
_randomSeedAction.setValue(NewRandomSeed());
}


void InitTsneSettings::fromVariantMap(const QVariantMap& variantMap)
{
GroupAction::fromVariantMap(variantMap);

_randomInitAction.fromParentVariantMap(variantMap);
_newRandomSeedAction.fromParentVariantMap(variantMap);
_randomSeedAction.fromParentVariantMap(variantMap);
_datasetInitAction.fromParentVariantMap(variantMap);
_dataDimensionActionX.fromParentVariantMap(variantMap);
_dataDimensionActionY.fromParentVariantMap(variantMap);
_rescaleInitAction.fromParentVariantMap(variantMap);
}

QVariantMap InitTsneSettings::toVariantMap() const
{
QVariantMap variantMap = GroupAction::toVariantMap();

_randomInitAction.insertIntoVariantMap(variantMap);
_newRandomSeedAction.insertIntoVariantMap(variantMap);
_randomSeedAction.insertIntoVariantMap(variantMap);
_datasetInitAction.insertIntoVariantMap(variantMap);
_dataDimensionActionX.insertIntoVariantMap(variantMap);
_dataDimensionActionY.insertIntoVariantMap(variantMap);
_rescaleInitAction.insertIntoVariantMap(variantMap);

return variantMap;
}
78 changes: 78 additions & 0 deletions src/tSNE/InitTsneSettings.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#pragma once

#include "actions/DatasetPickerAction.h"
#include "actions/IntegralAction.h"
#include "actions/ToggleAction.h"
#include "PointData/DimensionPickerAction.h"

#include <vector>

using namespace mv::gui;

class TsneSettingsAction;

/**
* Init TSNE setting action class
*
* Setup of t-SNE embedding initialization
*
* @author Alexander Vieth
*/
class InitTsneSettings : public GroupAction
{
public:

/**
* Constructor
* @param tsneSettingsAction Reference to TSNE settings action
*/
InitTsneSettings(TsneSettingsAction& tsneSettingsAction);

std::vector<float> getInitEmbedding(size_t numPoints);

void updateSeed();

/**
* only list point datasets with at least 2 dimensions
* and the same number of points as the input data
*/
void updateDataPicker(size_t numPointsInputData);

public: // Action getters

TsneSettingsAction& getTsneSettingsAction() { return _tsneSettingsAction; };
ToggleAction& getRandomInitAction() { return _randomInitAction; };
ToggleAction& getNewRandomSeedAction() { return _newRandomSeedAction; };
IntegralAction& getRandomSeedAction() { return _randomSeedAction; };
DatasetPickerAction& getDatasetInitAction() { return _datasetInitAction; };
DimensionPickerAction& getDataDimensionXAction() { return _dataDimensionActionX; };
DimensionPickerAction& getDataDimensionYAction() { return _dataDimensionActionY; };
ToggleAction& getRescaleInitAction() { return _rescaleInitAction; }

public: // Serialization

/**
* Load plugin from variant map
* @param Variant map representation of the plugin
*/
void fromVariantMap(const QVariantMap& variantMap) override;

/**
* Save plugin to variant map
* @return Variant map representation of the plugin
*/
QVariantMap toVariantMap() const override;

protected:
TsneSettingsAction& _tsneSettingsAction; /** Reference to parent tSNE settings action */
ToggleAction _randomInitAction; /** Init t-SNE randomly */
ToggleAction _newRandomSeedAction; /** New random seed on re-init */
IntegralAction _randomSeedAction; /** Random seed for init */
DatasetPickerAction _datasetInitAction; /** Data set to use for init */
DimensionPickerAction _dataDimensionActionX; /** Dimension of dataset to use for init X dim */
DimensionPickerAction _dataDimensionActionY; /** Dimension of dataset to use for init Y dim */
ToggleAction _rescaleInitAction; /** Whether to rescale the init embedding */

private:
size_t _numPointsInputData; /** Number of points of the input dataset */
};
Loading

0 comments on commit 5b6c87f

Please sign in to comment.