Skip to content

Commit

Permalink
Add Topk samples visualisation (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
danbraunai authored Jan 9, 2023
1 parent ddd138e commit 87e9550
Show file tree
Hide file tree
Showing 18 changed files with 8,013 additions and 2,706 deletions.
10,294 changes: 7,608 additions & 2,686 deletions python/Demonstration.ipynb

Large diffs are not rendered by default.

11 changes: 10 additions & 1 deletion python/circuitsvis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,17 @@
import circuitsvis.attention
import circuitsvis.examples
import circuitsvis.tokens
import circuitsvis.topk_samples
import circuitsvis.topk_tokens
import circuitsvis.logits

__version__ = version("circuitsvis")

__all__ = ["activations", "attention", "examples", "tokens"]
__all__ = [
"activations",
"attention",
"examples",
"tokens",
"topk_samples",
"topk_tokens",
]
2 changes: 1 addition & 1 deletion python/circuitsvis/tests/snapshots/snap_test_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

snapshots = Snapshot()

snapshots['TestAttention.test_matches_snapshot 1'] = '''<div id="circuits-vis-mock" style="margin: 15px 0;"/>
snapshots['TestTokens.test_matches_snapshot 1'] = '''<div id="circuits-vis-mock" style="margin: 15px 0;"/>
<script crossorigin type="module">
import { render, ColoredTokens } from "https://unpkg.com/circuitsvis@1.0.0/dist/cdn/esm.js";
render(
Expand Down
18 changes: 18 additions & 0 deletions python/circuitsvis/tests/snapshots/snap_test_topk_samples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
# snapshottest: v1 - https://goo.gl/zC4yUc
from __future__ import unicode_literals

from snapshottest import Snapshot


snapshots = Snapshot()

snapshots['TestTopkSamples.test_matches_snapshot 1'] = '''<div id="circuits-vis-mock" style="margin: 15px 0;"/>
<script crossorigin type="module">
import { render, TopkSamples } from "https://unpkg.com/circuitsvis@1.0.0/dist/cdn/esm.js";
render(
"circuits-vis-mock",
TopkSamples,
{"tokens": [[[["And", " here"], ["This", " is", " another"]], [["Another", " example"], ["Weee", " is", " another"]]]], "activations": [[[[0.2, 1], [1, 0.0, 0]], [[0, 1], [0.5, 1, 1]]]], "zerothDimensionName": "Layer", "firstDimensionName": "Neuron"}
)
</script>'''
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

snapshots = Snapshot()

snapshots['TestTopk.test_matches_snapshot 1'] = '''<div id="circuits-vis-mock" style="margin: 15px 0;"/>
snapshots['TestTopkTokens.test_matches_snapshot 1'] = '''<div id="circuits-vis-mock" style="margin: 15px 0;"/>
<script crossorigin type="module">
import { render, TopkTokens } from "https://unpkg.com/circuitsvis@1.0.0/dist/cdn/esm.js";
render(
Expand Down
7 changes: 2 additions & 5 deletions python/circuitsvis/tests/test_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@
from circuitsvis.tokens import colored_tokens


class TestAttention:
class TestTokens:
def test_matches_snapshot(self, snapshot, monkeypatch):
monkeypatch.setattr(circuitsvis.utils.render, "uuid4", lambda: "mock")
monkeypatch.setattr(circuitsvis, "__version__", "1.0.0")

res = colored_tokens(
tokens=["a", "b"],
values=[1, 2]
)
res = colored_tokens(tokens=["a", "b"], values=[1, 2])
snapshot.assert_match(str(res))
32 changes: 32 additions & 0 deletions python/circuitsvis/tests/test_topk_samples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from circuitsvis.topk_samples import topk_samples
import circuitsvis.utils.render
import numpy as np


class TestTopkSamples:
def test_matches_snapshot(self, snapshot, monkeypatch):
# Monkeypatch uuid4 to always return the same uuid
monkeypatch.setattr(circuitsvis.utils.render, "uuid4", lambda: "mock")
monkeypatch.setattr(circuitsvis, "__version__", "1.0.0")
tokens = [
[
["And", " here"],
["This", " is", " another"],
],
[
["Another", " example"],
["Weee", " is", " another"],
],
] # list of samples for the layer (n_neurons (2), samples (2), tokens (varied))
activations = [
[
[0.2, 1],
[1, 0.0, 0],
],
[
[0, 1],
[0.5, 1, 1],
],
] # list of samples for the layer (n_neurons (2), samples (2), tokens (varied))
res = topk_samples(tokens=[tokens], activations=[activations])
snapshot.assert_match(str(res))
2 changes: 1 addition & 1 deletion python/circuitsvis/tests/test_topk_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np


class TestTopk:
class TestTopkTokens:
def test_matches_snapshot(self, snapshot, monkeypatch):
# Monkeypatch uuid4 to always return the same uuid
monkeypatch.setattr(circuitsvis.utils.render, "uuid4", lambda: "mock")
Expand Down
35 changes: 35 additions & 0 deletions python/circuitsvis/topk_samples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Activations visualizations"""
from typing import List, Optional

from circuitsvis.utils.render import RenderedHTML, render


def topk_samples(
tokens: List[List[List[List[str]]]],
activations: List[List[List[List[float]]]],
zeroth_dimension_name: Optional[str] = "Layer",
first_dimension_name: Optional[str] = "Neuron",
zeroth_dimension_labels: Optional[List[str]] = None,
first_dimension_labels: Optional[List[str]] = None,
) -> RenderedHTML:
"""List of samples in descending order of max token activation value for the
selected layer and neuron (or whatever other dimension names are specified).
Args:
tokens: List of tokens of shape [layers x neurons x samples x tokens]
activations: Activations of shape [layers x neurons x samples x tokens]
zeroth_dimension_name: Zeroth dimension to display (e.g. "Layer")
first_dimension_name: First dimension to display (e.g. "Neuron")
Returns:
Html: TopkSamples visualization
"""
return render(
"TopkSamples",
tokens=tokens,
activations=activations,
zerothDimensionName=zeroth_dimension_name,
firstDimensionName=first_dimension_name,
zerothDimensionLabels=zeroth_dimension_labels,
firstDimensionLabels=first_dimension_labels,
)
3 changes: 2 additions & 1 deletion react/src/activations/TextNeuronActivations.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ export function getSelectedActivations(
/**
* Show activations (colored by intensity) for each token.
*
* Includes drop-downs for layer and neuron numbers.
* Includes drop-downs for e.g. showing the activations for the selected layer
* and neuron for the given samples.
*/
export function TextNeuronActivations({
tokens,
Expand Down
6 changes: 3 additions & 3 deletions react/src/activations/mocks/textNeuronActivations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ const text: string = `
Preparation
`;

function chunkText(textArr: string[]) {
// Split textArr into chunks of random size between 50 and 100 words
function chunkText(textArr: string[]): string[][] {
const chunks: string[][] = [];
let i = 0;
// Split textArr into 12 chunks of 75 tokens
const chunkSize = 75;
while (i < textArr.length) {
const chunkSize = Math.floor(Math.random() * 50) + 50;
chunks.push(textArr.slice(i, i + chunkSize));
i += chunkSize;
}
Expand Down
3 changes: 2 additions & 1 deletion react/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ export { ColoredTokens } from "./tokens/ColoredTokens";
export { Hello } from "./examples/Hello";
export { render } from "./render-helper";
export { TextNeuronActivations } from "./activations/TextNeuronActivations";
export { TopkTokens } from "./topk/TopkTokens";
export { TopkSamples } from "./topk/TopkSamples";
export { TokenLogProbs } from "./logits/TokenLogProbs";
export { TopkTokens } from "./topkTokens/TopkTokens";
21 changes: 21 additions & 0 deletions react/src/topk/TopkSamples.stories.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { ComponentStory, ComponentMeta } from "@storybook/react";
import React from "react";
import { mockActivations, mockTokens, neuronLabels } from "./mocks/topkSamples";
import { TopkSamples } from "./TopkSamples";

export default {
component: TopkSamples
} as ComponentMeta<typeof TopkSamples>;

const Template: ComponentStory<typeof TopkSamples> = (args) => (
<TopkSamples {...args} />
);

export const ExampleSamples: ComponentStory<typeof TopkSamples> = Template.bind(
{}
);
ExampleSamples.args = {
tokens: mockTokens,
activations: mockActivations,
firstDimensionLabels: neuronLabels
};
177 changes: 177 additions & 0 deletions react/src/topk/TopkSamples.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import React, { useState, useEffect } from "react";
import { Container, Row, Col } from "react-grid-system";
import { SampleItems } from "../shared/SampleItems";
import { RangeSelector } from "../shared/RangeSelector";
import { NumberSelector } from "../shared/NumberSelector";
import { minMaxInNestedArray } from "../utils/arrayOps";

/**
* List of samples in descending order of max token activation value for the
* selected layer and neuron (or whatever other dimension names are specified).
*/
export function TopkSamples({
tokens,
activations,
zerothDimensionName = "Layer",
firstDimensionName = "Neuron",
zerothDimensionLabels,
firstDimensionLabels
}: TopkSamplesProps) {
// Obtain min and max activations for a consistent color scale across all samples
const [minValue, maxValue] = minMaxInNestedArray(activations);

const numberOfLayers = activations.length;
const numberOfNeurons = activations[0].length;
const numberOfSamples = activations[0][0].length;

const [samplesPerPage, setSamplesPerPage] = useState<number>(
Math.min(5, numberOfSamples)
);
const [sampleNumbers, setSampleNumbers] = useState<number[]>([
...Array(samplesPerPage).keys()
]);
const [layerNumber, setLayerNumber] = useState<number>(0);
const [neuronNumber, setNeuronNumber] = useState<number>(0);

useEffect(() => {
// When the user changes the samplesPerPage, update the sampleNumbers
setSampleNumbers([...Array(samplesPerPage).keys()]);
}, [samplesPerPage]);

// Get the relevant activations for the selected layer and neuron.
const selectedActivations: number[][] = sampleNumbers.map((sampleNumber) => {
return activations[layerNumber][neuronNumber][sampleNumber];
});
const selectedTokens: string[][] = sampleNumbers.map((sampleNumber) => {
return tokens[layerNumber][neuronNumber][sampleNumber];
});

const selectRowStyle = {
paddingTop: 5,
paddingBottom: 5
};

return (
<Container fluid>
<Row>
<Col>
<Row style={selectRowStyle}>
<Col>
<label htmlFor="layer-selector" style={{ marginRight: 15 }}>
{zerothDimensionName}:
</label>
<NumberSelector
id="layer-selector"
largestNumber={numberOfLayers! - 1}
currentValue={layerNumber}
setCurrentValue={setLayerNumber}
labels={zerothDimensionLabels}
/>
</Col>
</Row>
<Row style={selectRowStyle}>
<Col>
<label htmlFor="neuron-selector" style={{ marginRight: 15 }}>
{firstDimensionName}:
</label>
<NumberSelector
id="neuron-selector"
largestNumber={numberOfNeurons! - 1}
currentValue={neuronNumber}
setCurrentValue={setNeuronNumber}
labels={firstDimensionLabels}
/>
</Col>
</Row>
{/* Only show the sample selector if there is more than one sample */}
{numberOfSamples > 1 && (
<Row style={selectRowStyle}>
<Col>
<label htmlFor="sample-selector" style={{ marginRight: 15 }}>
Samples (descending):
</label>
<RangeSelector
id="sample-selector"
largestNumber={numberOfSamples - 1}
currentRangeArr={sampleNumbers}
setCurrentValue={setSampleNumbers}
numValsInRange={samplesPerPage}
/>
</Col>
</Row>
)}
</Col>
<Col>
{/* Only show the sample per page selector if there is more than one sample */}
{numberOfSamples > 1 && (
<Row style={selectRowStyle}>
<Col>
<label
htmlFor="samples-per-page-selector"
style={{ marginRight: 15 }}
>
Samples per page:
</label>
<NumberSelector
id="samples-per-page-selector"
smallestNumber={1}
largestNumber={numberOfSamples}
currentValue={samplesPerPage}
setCurrentValue={setSamplesPerPage}
/>
</Col>
</Row>
)}
</Col>
</Row>
<Row>
<Col>
<SampleItems
activationsList={selectedActivations}
tokensList={selectedTokens}
minValue={minValue}
maxValue={maxValue}
/>
</Col>
</Row>
</Container>
);
}

export interface TopkSamplesProps {
/**
* Nested list of tokens of shape [layers x neurons x samples x tokens]
*
* The inner most dimension must be the same size as the inner most dimension of activations.
*
* For example, the first and second dimensisons (1-indexed) may correspond to
* layers and neurons.
*/
tokens: string[][][][];

/**
* Activations for the tokens with shape [layers x neurons x samples x tokens]
*
*/
activations: number[][][][];

/**
* Name of the zeroth dimension
*/
zerothDimensionName?: string;

/**
* Name of the first dimension
*/
firstDimensionName?: string;

/**
* Labels for the zeroth dimension
*/
zerothDimensionLabels?: string[];

/**
* Labels for the first dimension
*/
firstDimensionLabels?: string[];
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ const Template: ComponentStory<typeof TopkTokens> = (args) => (
<TopkTokens {...args} />
);

export const SmallModelExample = Template.bind({});
SmallModelExample.args = {
export const ExampleTokens: ComponentStory<typeof TopkTokens> = Template.bind(
{}
);
ExampleTokens.args = {
tokens: mockTokens,
topkVals,
topkIdxs,
Expand Down
File renamed without changes.
Loading

0 comments on commit 87e9550

Please sign in to comment.