Skip to content

Commit

Permalink
Constrained sparsemax and softmax (jankrepl#37)
Browse files Browse the repository at this point in the history
New allocators implemented via convex optimization
  • Loading branch information
jankrepl authored Jun 3, 2020
1 parent a815fb8 commit 54e7dcf
Show file tree
Hide file tree
Showing 5 changed files with 328 additions and 18 deletions.
7 changes: 5 additions & 2 deletions deepdow/layers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Collection of layers."""

from .collapse import AttentionCollapse, AverageCollapse, ElementCollapse, ExponentialCollapse, MaxCollapse, SumCollapse
from .allocate import AnalyticalMarkowitz, NCO, NumericalMarkowitz, Resample, SoftmaxAllocator
from .collapse import (AttentionCollapse, AverageCollapse, ElementCollapse, ExponentialCollapse,
MaxCollapse, SumCollapse)
from .allocate import (AnalyticalMarkowitz, NCO, NumericalMarkowitz, Resample, SoftmaxAllocator,
SparsemaxAllocator)
from .misc import Cov2Corr, CovarianceMatrix, KMeans, MultiplyByConstant
from .transform import Conv, RNN

Expand All @@ -21,4 +23,5 @@
'Resample',
'RNN',
'SoftmaxAllocator',
'SparsemaxAllocator',
'SumCollapse']
123 changes: 119 additions & 4 deletions deepdow/layers/allocate.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,16 +357,52 @@ class SoftmaxAllocator(torch.nn.Module):
Parameters
----------
temperature : None or float
If None, then needs to be provided per sample during forward pass. If ``float`` then assumed to be always
the same.
If None, then needs to be provided per sample during forward pass. If ``float`` then assumed
to be always the same.
formulation : str, {'analytical', 'variational'}
Controls what way the problem is solved. If 'analytical' then using an explicit formula,
however, one cannot decide on a `max_weight` different than 1. If `variational` then solved
via convex optimization and one can set any `max_weight`.
n_assets : None or int
Only required and used if `formulation='variational`.
max_weight : float
A float between (0, 1] representing the maximum weight per asset.
"""

def __init__(self, temperature=1):
def __init__(self, temperature=1, formulation='analytical', n_assets=None, max_weight=1):
super().__init__()

self.temperature = temperature

if formulation not in {'analytical', 'variational'}:
raise ValueError('Unrecognized formulation {}'.format(formulation))

if formulation == 'variational' and n_assets is None:
raise ValueError('One needs to provide n_assets for the variational formulation.')

if formulation == 'analytical' and max_weight != 1:
raise ValueError('Cannot constraint weights via max_weight for analytical formulation')

if formulation == 'variational' and n_assets * max_weight < 1:
raise ValueError('One cannot create fully invested portfolio with the given max_weight')

self.formulation = formulation

if formulation == 'analytical':
self.layer = torch.nn.Softmax(dim=1)
else:
x = cp.Parameter(n_assets)
w = cp.Variable(n_assets)
obj = -x * w - cp.sum(cp.entr(w))
cons = [cp.sum(w) == 1.,
w <= max_weight]
prob = cp.Problem(cp.Minimize(obj), cons)
self.layer = CvxpyLayer(prob, [x], [w])

def forward(self, x, temperature=None):
"""Perform forward pass.
Expand Down Expand Up @@ -398,4 +434,83 @@ def forward(self, x, temperature=None):

inp = x / temperature_[..., None]

return nn.functional.softmax(inp, dim=1)
return self.layer(inp) if self.formulation == 'analytical' else self.layer(inp)[0]


class SparsemaxAllocator(torch.nn.Module):
"""Portfolio creation by computing a sparsemax over the asset dimension with temperature.
Parameters
----------
n_assets : int
Number of assets. Note that we require this quantity at construction to make sure
the underlying cvxpylayer does not need to be reinitialized every forward pass.
temperature : None or float
If None, then needs to be provided per sample during forward pass. If ``float`` then
assumed to be always the same.
max_weight : float
A float between (0, 1] representing the maximum weight per asset.
References
----------
[1] Martins, Andre, and Ramon Astudillo. "From softmax to sparsemax: A sparse model of attention
and multi-label classification." International Conference on Machine Learning. 2016.
[2] Malaviya, Chaitanya, Pedro Ferreira, and André FT Martins. "Sparse and constrained attention
for neural machine translation." arXiv preprint arXiv:1805.08241 (2018)
"""

def __init__(self, n_assets, temperature=1, max_weight=1):
super().__init__()

if n_assets * max_weight < 1:
raise ValueError('One cannot create fully invested portfolio with the given max_weight')

self.n_assets = n_assets
self.temperature = temperature

# Construct convex optimization problem
x = cp.Parameter(n_assets)
w = cp.Variable(n_assets)
obj = cp.sum_squares(x - w)
cons = [cp.sum(w) == 1,
0. <= w,
w <= max_weight]
prob = cp.Problem(cp.Minimize(obj), cons)

self.layer = CvxpyLayer(prob, parameters=[x], variables=[w])

def forward(self, x, temperature=None):
"""Perform forward pass.
Parameters
----------
x : torch.Tensor
Tensor of shape `(n_samples, n_assets`).
temperature : None or torch.Tensor
If None, then using the `temperature` provided at construction time. Otherwise a
`torch.Tensor` of shape `(n_samples,)` representing a per sample temperature.
Returns
-------
weights : torch.Tensor
Tensor of shape `(n_samples, n_assets`).
"""
n_samples, _ = x.shape
device, dtype = x.device, x.dtype

if not ((temperature is None) ^ (self.temperature is None)):
raise ValueError('Not clear which temperature to use')

if temperature is not None:
temperature_ = temperature # (n_samples,)
else:
temperature_ = self.temperature * torch.ones(n_samples, dtype=dtype, device=device)

inp = x / temperature_[..., None]

return self.layer(inp)[0]
59 changes: 59 additions & 0 deletions docs/source/layers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,24 @@ performs a softmax over the input. Additionally, one can also provide custom :co
Note that one can provide a single :code:`temperature` at construction that is shared across all samples. Alternatively,
one can provide per sample temperature when performing the forward pass.

The above formulation (:code:`formulation`) is **analytical**. One can also obtain the same weights
via solving a convex optimization problem (**variational** formulation). See [Agrawal2019]_ and
[Martins2017]_ for more details.

.. math::
\begin{aligned}
\min_{\textbf{w}} \quad & - \textbf{x}^T \textbf{w} - H(\textbf{w}) \\
\textrm{s.t.} \quad & \sum_{i=1}^{N}w_i = 1 \\
\quad & w_i >= 0, i \in \{1,...,N\}\\
\quad & w_i <= w_{\text{max}}, i \in \{1,...,N\}\\
\end{aligned}
where :math:`H(\textbf{w})=-\sum_{i=1}^{N} w_i \log(w_i)` is the entropy. Note that if
:code:`max_weight` is set to 1 then one gets the unconstrained (analytical) softmax. The benefit of
using the variational formulation is the fact that the user can decide on any :code:`max_weight`
from :code:`(0, 1]`.

.. testcode::

from deepdow.layers import SoftmaxAllocator
Expand All @@ -237,6 +255,41 @@ one can provide per sample temperature when performing the forward pass.
assert w.shape == (2, 2)
assert torch.allclose(w.sum(1), torch.ones(2))

SparsemaxAllocator
******************
Suggested in [Martins2016]_. It is similar to Softmax but enforces sparsity. It currently uses
:code:`cvxpylayers` as a backend. See below a mathematical formulation. note that **x** represents
the logits.

.. math::
\begin{aligned}
\min_{\textbf{w}} \quad & {\vert \vert \textbf{w} - \textbf{x} \vert \vert}^2_{2} \\
\textrm{s.t.} \quad & \sum_{i=1}^{N}w_i = 1 \\
\quad & w_i >= 0, i \in \{1,...,N\}\\
\quad & w_i <= w_{\text{max}}, i \in \{1,...,N\}\\
\end{aligned}
Similarly to :code:`SoftmaxAllocator` one can provide temperature either per sample or a single
one at construction. Additionally, one can control the maximum weight via the :code:`max_weight`
parameter.

.. testcode::

from deepdow.layers import SparsemaxAllocator

n_assets = 3
layer = SparsemaxAllocator(n_assets, temperature=1)
x = torch.tensor([[1, 2.3, 2.1], [2, 4.2, -1.1]])

w = layer(x)
w_true = torch.tensor([[-1.2650e-10, 6.0000e-01, 4.0000e-01],
[-2.9905e-10, 1.0000e+00, 4.2659e-10]])

assert w.shape == (2, 3)
assert torch.allclose(w.sum(1), torch.ones(2))
assert torch.allclose(w, w_true, atol=1e-5)


Misc layers
-----------
Expand Down Expand Up @@ -342,11 +395,17 @@ References
.. [Michaud2007]
Michaud, Richard O., and Robert Michaud. "Estimation error and portfolio optimization: a resampling solution." Available at SSRN 2658657 (2007).
.. [Martins2016]
Martins, Andre, and Ramon Astudillo. "From softmax to sparsemax: A sparse model of attention and multi-label classification." International Conference on Machine Learning. 2016.
.. [Ledoit2004]
Ledoit, Olivier, and Michael Wolf. "Honey, I shrunk the sample covariance matrix." The Journal of Portfolio Management 30.4 (2004): 110-119.
.. [sklearnkmeans]
https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
.. [Martins2017]
Martins, André FT, and Julia Kreutzer. "Learning what’s easy: Fully differentiable neural easy-first taggers." Proceedings of the 2017 conference on empirical methods in natural language processing. 2017.
.. [Bodnar2013]
Bodnar, Taras, Nestor Parolya, and Wolfgang Schmid. "On the equivalence of quadratic optimization problems commonly used in portfolio theory." European Journal of Operational Research 229.3 (2013): 637-644.
8 changes: 6 additions & 2 deletions docs/source/networks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ The activations have the following shape (ommiting the sample dimension).
(norm_layer_2): GroupNorm(4, 32, eps=1e-05, affine=True)
(time_collapse_layer): AverageCollapse()
(channel_collapse_layer): AverageCollapse()
(portfolio_opt_layer): SoftmaxAllocator()
(portfolio_opt_layer): SoftmaxAllocator(
(layer): Softmax(dim=1)
)
)


Expand Down Expand Up @@ -184,7 +186,9 @@ The activations have the following shape (ommiting the sample dimension).
(norm_layer): BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(dropout_layer): Dropout(p=0.5, inplace=False)
(linear): Linear(in_features=600, out_features=10, bias=True)
(allocate_layer): SoftmaxAllocator()
(allocate_layer): SoftmaxAllocator(
(layer): Softmax(dim=1)
)
)


Expand Down
Loading

0 comments on commit 54e7dcf

Please sign in to comment.