This method computes the gradients (also known as 'Vanilla Gradients') of
the outputs with respect to the input variables, i.e. for all input
variable \(i\) and output class \(j\)
$$d f(x)_j / d x_i.$$
If the argument times_input
is TRUE
, the gradients are multiplied by
the respective input value ('Gradient x Input'), i.e.
$$x_i * d f(x)_j / d x_i.$$
innsight::InterpretingMethod
-> innsight::GradientBased
-> Gradient
new()
Create a new instance of the Vanilla Gradient method.
Gradient$new(
converter,
data,
channels_first = TRUE,
output_idx = NULL,
ignore_last_act = TRUE,
times_input = FALSE,
dtype = "float"
)
converter
An instance of the R6 class Converter
.
data
The data for which the gradients are to be calculated. It has to be an array or array-like format of size (batch_size, dim_in).
channels_first
The format of the given data, i.e. channels on
last dimension (FALSE
) or after the batch dimension (TRUE
). If the
data has no channels, use the default value TRUE
.
output_idx
This vector determines for which outputs the method
will be applied. By default (NULL
), all outputs (but limited to the
first 10) are considered.
ignore_last_act
A boolean value to include the last
activation into all the calculations, or not (default: TRUE
). In
some cases, the last activation leads to a saturation problem.
times_input
Multiplies the gradients with the input features.
This method is called 'Gradient x Input'. Default: FALSE
.
dtype
The data type for the calculations. Use
either 'float'
for torch::torch_float or 'double'
for
torch::torch_double.
#----------------------- Example 1: Torch ----------------------------------
library(torch)
# Create nn_sequential model and data
model <- nn_sequential(
nn_linear(5, 12),
nn_relu(),
nn_linear(12, 2),
nn_softmax(dim = 2)
)
data <- torch_randn(25, 5)
# Create Converter with input and output names
converter <- Converter$new(model,
input_dim = c(5),
input_names = list(c("Car", "Cat", "Dog", "Plane", "Horse")),
output_names = list(c("Buy it!", "Don't buy it!"))
)
# Calculate the Gradients
grad <- Gradient$new(converter, data)
#> Backward pass 'Gradient':
#>
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
# Print the result as a data.frame for first 5 rows
grad$get_result("data.frame")[1:5,]
#> data feature class value
#> 1 data_1 Car Buy it! -0.002846509
#> 2 data_2 Car Buy it! -0.036616135
#> 3 data_3 Car Buy it! -0.103906184
#> 4 data_4 Car Buy it! -0.172386572
#> 5 data_5 Car Buy it! -0.115882114
# Plot the result for both classes
plot(grad, output_idx = 1:2)
# Plot the boxplot of all datapoints
boxplot(grad, output_idx = 1:2)
# ------------------------- Example 2: Neuralnet ---------------------------
library(neuralnet)
data(iris)
# Train a neural network
nn <- neuralnet(Species ~ ., iris,
linear.output = FALSE,
hidden = c(10, 5),
act.fct = "logistic",
rep = 1
)
# Convert the trained model
converter <- Converter$new(nn)
# Calculate the gradients
gradient <- Gradient$new(converter, iris[, -5], times_input = TRUE)
#> Backward pass 'Gradient':
#>
|
| | 0%
|
|======================= | 33%
|
|=============================================== | 67%
|
|======================================================================| 100%
# Plot the result for the first and 60th data point and all classes
plot(gradient, data_idx = c(1, 60), output_idx = 1:3)
# Calculate Gradients x Input and do not ignore the last activation
gradient <- Gradient$new(converter, iris[, -5], ignore_last_act = FALSE)
#> Backward pass 'Gradient':
#>
|
| | 0%
|
|======================= | 33%
|
|=============================================== | 67%
|
|======================================================================| 100%
# Plot the result again
plot(gradient, data_idx = c(1, 60), output_idx = 1:3)
# ------------------------- Example 3: Keras -------------------------------
library(keras)
if (is_keras_available()) {
data <- array(rnorm(64 * 60 * 3), dim = c(64, 60, 3))
model <- keras_model_sequential()
model %>%
layer_conv_1d(
input_shape = c(60, 3), kernel_size = 8, filters = 8,
activation = "softplus", padding = "valid"
) %>%
layer_conv_1d(
kernel_size = 8, filters = 4, activation = "tanh",
padding = "same"
) %>%
layer_conv_1d(
kernel_size = 4, filters = 2, activation = "relu",
padding = "valid"
) %>%
layer_flatten() %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 16, activation = "relu") %>%
layer_dense(units = 3, activation = "softmax")
# Convert the model
converter <- Converter$new(model)
# Apply the Gradient method
gradient <- Gradient$new(converter, data, channels_first = FALSE)
# Plot the result for the first datapoint and all classes
plot(gradient, output_idx = 1:3)
# Plot the result as boxplots for first two classes
boxplot(gradient, output_idx = 1:2)
# You can also create an interactive plot with plotly.
# This is a suggested package, so make sure that it is installed
library(plotly)
# Result as boxplots
boxplot(gradient, as_plotly = TRUE)
# Result of the second data point
plot(gradient, data_idx = 2, as_plotly = TRUE)
}
#> Backward pass 'Gradient':
#>
|
| | 0%
|
|======================= | 33%
|
|=============================================== | 67%
|
|======================================================================| 100%
# ------------------------- Advanced: Plotly -------------------------------
# If you want to create an interactive plot of your results with custom
# changes, you can take use of the method plotly::ggplotly
library(ggplot2)
library(plotly)
library(neuralnet)
data(iris)
nn <- neuralnet(Species ~ .,
iris,
linear.output = FALSE,
hidden = c(10, 8), act.fct = "tanh", rep = 1, threshold = 0.5
)
# create an converter for this model
converter <- Converter$new(nn)
# create new instance of 'Gradient'
gradient <- Gradient$new(converter, iris[, -5])
#> Backward pass 'Gradient':
#>
|
| | 0%
|
|======================= | 33%
|
|=============================================== | 67%
|
|======================================================================| 100%
library(plotly)
# Get the ggplot and add your changes
p <- plot(gradient, output_idx = 1, data_idx = 1:2) +
theme_bw() +
scale_fill_gradient2(low = "green", mid = "black", high = "blue")
#> Scale for 'fill' is already present. Adding another scale for 'fill', which
#> will replace the existing scale.
# Now apply the method plotly::ggplotly with argument tooltip = "text"
plotly::ggplotly(p, tooltip = "text")