Skip to content

Commit

Permalink
GLU fusion
Browse files Browse the repository at this point in the history
  • Loading branch information
AleHD committed Jun 27, 2024
1 parent bcf405d commit ed1ca7d
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions src/nanotron/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,7 @@ def __init__(
bias=False,
async_communication=tp_linear_async_communication and tp_mode is TensorParallelLinearMode.REDUCE_SCATTER,
)
# TODO @nouamane: why can't we torch.jit.script GLUActivation?
self.split_silu_mul = GLUActivation(config.hidden_act)
self.split_silu_mul = torch.compile(GLUActivation(config.hidden_act))

def forward(self, hidden_states): # [seq_length, batch_size, hidden_dim]
merged_states = self.gate_up_proj(hidden_states)
Expand Down

0 comments on commit ed1ca7d

Please sign in to comment.