Softmaxoutput with temparature

I want to use temperature while training my multi-task network.
I can see that mx.symbol.softmax( data=None , axis=_Null , temperature=_Null , name=None , attr=None , out=None , **kwargs )
supports temparature, but when we get to softmaxoutput it is not there

How do I use softmaxoutput with temp?

I’m afraid you’d have to implement the loss yourself. For symbolic:

loss = sym.MakeLoss(-sym.pick(sym.log_softmax(data, temperature), label, axis, keepdims=True)))

In gluon, modify this implementation to add temperature:

class SoftmaxCrossEntropyLoss(gluon.loss.Loss):
    def __init__(self, axis=-1, sparse_label=True, from_logits=False, weight=None,
                 batch_axis=0, temperature=1.0, **kwargs):
        super(SoftmaxCrossEntropyLoss, self).__init__(weight, batch_axis, **kwargs)
        self._axis = axis
        self._sparse_label = sparse_label
        self._from_logits = from_logits
        self._temperature=temperature

    def hybrid_forward(self, F, pred, label, sample_weight=None):
        if not self._from_logits:
            pred = F.log_softmax(pred, self._axis, temperature=self._temperature)
        if self._sparse_label:
            loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
        else:
            label = _reshape_like(F, label, pred)
            loss = -F.sum(pred*label, axis=self._axis, keepdims=True)
        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        return F.mean(loss, axis=self._batch_axis, exclude=True)

I should probably submit a PR for the above change.

2 Likes