proofs_new.tex

\appendix
\section{Proofs}
\label{sec:proofs}

First we state some technical lemmas that will be used in the following proofs.

Define the Lambert function $W(x):\R\rightarrow\R$ as the one that satisfies the equality\footnote{For $x<0$ the Lambert function is multivalued. Hence, to avoid complication and because we only need positive arguments, we will define it only for positive values of $x$.}
\begin{equation}
\label{eq:lambert}
x=W(x) \exp \left(W(x)\right), \ \forall x\geq0.
\end{equation}
It satisfies the following properties.
%
\begin{lemma}
The Lambert function satisfies $0.6321 \log(x+1) \leq W(x) \leq \log(x+1), \forall x\geq0$.
\end{lemma}
%
\begin{proof}
We first prove the lower bound. From \eqref{eq:lambert} we have
\begin{align}
W(x) &= \log\left(\frac{x}{W(x)}\right) \label{eq:lm_lambert_1} \\
&= \log\left(\frac{x}{\log(x/W(x))}\right). \label{eq:lm_lambert_1b}
\end{align}
From the first equality, for any $a>0$, we get
\[
W(x) \leq \frac{1}{a\, e}\left(\frac{x}{W(x)}\right)^a
\]
that is
\begin{equation}
\label{eq:lm_lambert_2}
W(x) \leq \left(\frac{1}{a\, e}\right)^\frac{1}{1+a} x^\frac{a}{1+a}.
\end{equation}
Using \eqref{eq:lm_lambert_2} in \eqref{eq:lm_lambert_1}, we have
\begin{align*}
W(x) 
\geq \log\left(\frac{x}{\left(\frac{1}{a\, e}\right)^\frac{1}{1+a} x^\frac{a}{1+a}}\right) 
= \frac{1}{1+a}\log\left(a \, e\, x\right)~.
\end{align*}
Consider now the function $g(x)=\frac{x}{x+1} - \frac{b}{\log(1+b) (b+1)} \log(x+1), x\geq b$. This function has a maximum in $x^*=(1+\frac{1}{b}) \log(1+b)-1$, the derivative is positive in $[0,x^*]$ and negative in $[x^*,b]$. Hence the minimum is in $x=0$ and in $x=b$, where it is equal to $0$.
Using the property just proved on $g$, we have that for $x\leq b$, setting $a=\frac{1}{x}$, we have
\begin{align*}
W(x) 
\geq \frac{x}{x+1} \geq \frac{b}{\log(1+b) (b+1)} \log(x+1)~.
\end{align*}
For $x>b$, setting $a=\frac{x+1}{e x}$, we have
\begin{align}
W(x) 
&\geq \frac{e\,x}{(e+1) x + 1} \log(x+1) \geq \frac{e\,b}{(e+1) b + 1} \log(x+1)
\end{align}
Hence, we set $b$ such that 
\[
\frac{e\, b}{(e+1)b + 1} = \frac{b}{\log(1+b) (b+1)}
\]
Numberically, $b=1.71825...$, so
\[
W(x) \geq 0.6321 \log(x+1)~. \qedhere
\]

For the upper bound, we use Theorem~2.3 in \cite{hoorfar2008inequalities}, that says that
\[
W(x) \leq \log\frac{x+C}{1+\log(C)}, \quad \forall x> -\frac{1}{e}, \ C>\frac{1}{e}.
\]
Setting $C=1$, we obtain the stated bound.
\end{proof}

\begin{lemma}
Define $f(\theta)= \beta \exp\frac{x^2}{2 \alpha}$, for $\alpha,\beta>0$, $x\geq0$. Then
\[
f^*(y)=y \sqrt{\alpha W\left(\frac{\alpha y^2}{\beta^2}\right)} - \beta \exp\left(\frac{W\left(\frac{\alpha y^2}{\beta^2}\right)}{2}\right).
\]
Moreover
\[
f^*(y) \leq y \sqrt{\alpha \log \left(\frac{\alpha y^2}{\beta^2} +1 \right)} - \beta.
\]
\end{lemma}
\begin{proof}
From the definition of Fenchel dual, we have
\begin{align*}
f^*(y)= \max_{x} \  x\, y - f(x) = \max_{x} \  x\, y - \beta \exp\frac{x^2}{2 \alpha} \leq x^*\,y -\beta
\end{align*}
where $x^*= \argmax_{x} x\, y - f(x)$. We now use the fact that $x^*$ satisfies $y = f'(x^*)$, to have
\begin{align*}
x^*=\sqrt{\alpha W\left(\frac{\alpha y^2}{\beta^2}\right)},
\end{align*}
where the function $W:\R_+ \rightarrow \R$ is the Lambert function that satisfies
\[
x=W(x) \exp \left(W(x)\right).
\]
Hence, to obtain an upper bound we need an upper bound to the Lambert function.
We use Theorem~2.3 in \cite{hoorfar2008inequalities}, that says that
\[
W(x) \leq \log\frac{x+C}{1+\log(C)}, \quad \forall x> -\frac{1}{e}, \ C>\frac{1}{e}.
\]
Setting $C=1$, we obtain the stated bound.
\end{proof}

\begin{lemma}[ {\citep[Example 13.7]{BauschkeC2011}} ]
Let $\phi:\R \rightarrow (-\infty, +\infty]$ be even. Then $(\phi \ast \norm{\cdot})^*=\phi^* \ast \norm{\cdot}$.
\end{lemma}

\begin{cor}
\label{cor:dual_exp_square}
Define $f(\theta)= \beta \exp\frac{\norm{\theta}^2}{2 \alpha}$, for $\alpha,\beta>0$. Then
\[
f^*(y) \leq  \norm{\theta} \sqrt{\alpha \log \left(\frac{\alpha \norm{\theta}^2}{\beta^2} +1 \right)} - \beta.
\]
\end{cor}


\subsection{Proof of Theorem~\ref{theo:hilbert_reward}}
\begin{proof}
  For simplicity denote by $f_t(\cdot)=f\left(\cdot, \{\norm{g_1}, \ldots, \norm{g_t}\}\right)$.
  We will prove the thesis by induction. The base case is verified from the first point of Assumption~\ref{assumption:1-d_algo}. The, we assume that 
  \[
  \epsilon + \sum_{t=1}^{n-1} \langle g_t, w_t \rangle \geq f_{n-1}\left( \norm{\sum_{t=1}^{n-1} g_t}\right),
  \]
  and we want to prove that 
  \[
  \epsilon + \sum_{t=1}^{n} \langle g_t, w_t \rangle \geq f_{n}\left( \norm{\sum_{t=1}^{n} g_t}\right)~.
  \]
  We have that
  \begin{align*}
  \epsilon + \sum_{t=1}^{n} &\langle g_t, w_t \rangle - f_n\left( \norm{\sum_{t=1}^{n} g_t}\right) \\
  &= \langle g_n, w_n \rangle + \epsilon + \sum_{t=1}^{n-1} \langle g_t, w_t \rangle - f_n\left( \norm{\sum_{t=1}^{n} g_t}\right)\\
  &= \left(1+\frac{b_n}{\norm{\theta_{n-1}}}\langle \theta_{n-1},g_n \rangle \right)\left(\sum_{t=1}^{n-1} \langle g_t, w_t \rangle +\epsilon \right) - f_n\left( \norm{\sum_{t=1}^{n} g_t}\right)\\
  &\geq \left(1+\frac{b_n}{\norm{\theta_{n-1}}}\langle \theta_{n-1},g_n \rangle \right) f_{n-1}\left( \norm{\sum_{t=1}^{n-1} g_t}\right) - f_n\left( \norm{\sum_{t=1}^{n} g_t}\right)\\
  &= \left(1+\frac{b_n}{\norm{\theta_{n-1}}}\langle \theta_{n-1},g_n \rangle \right) f_{n-1}\left( \norm{\sum_{t=1}^{n-1} g_t}\right) - f_n\left( \norm{g_n + \sum_{t=1}^{n-1} g_t}\right)\\
  %&= \left(1+\frac{b_n}{\norm{\theta_{n-1}}}\langle \theta_{n-1},g_n \rangle \right) f_{n-1}\left( \norm{\theta_{n-1}}\right) - f_n\left( \sqrt{\norm{\theta_{n-1}}^2 + \norm{g_n}^2 + 2 \langle \theta_{n-1}, g_n \rangle} \right)\\
  &\geq \min_{r\in \{-1,1\}} \left(1+ r\, b_n \norm{g_n} \right) f_{n-1}\left( \norm{\theta_{n-1}}\right) - f_n\left(\left| \norm{\theta_{n-1}} + r \norm{g_n}\right|\right)\\
  &= \min_{r\in \{-1,1\}} \left(1+ r\, b_n \norm{g_n} \right) f_{n-1}\left( \norm{\theta_{n-1}}\right) - f_n\left( \norm{\theta_{n-1}} + r \norm{g_n}\right)\\
  &\geq 0,
  \end{align*}
  where the first inequality comes from the induction hypothesis, the second one using Lemma~8 in \citet{McMahanO14} and the last one by the hypothesis on the \ac{MBA}.
\end{proof}


\subsection{Proof of Theorem~\ref{theo:self_tune}}
\begin{proof}
The statement is readily proved using the inequality $a\,b \leq \frac{a^2}{\lambda} + \lambda b^2, \ \forall a,b\geq0, \lambda>0$ and Theorem~\ref{theo:online_to_batch}.
\end{proof}

\subsection{Proof of Theorem~\ref{theo:regret_pistol}}
\begin{proof}
From Theorem~\ref{theo:hilbert_reward} we have that
\[
\gain_n = \wealth_n - \epsilon  \geq B n^{-C} \exp\left(\frac{\norm{\sum_{t=1}^n g_t}^2}{A n}\right)-D - \epsilon~.
\]
We now apply Theorem~\ref{thm:rrdual} and Corollary~\ref{cor:dual_exp_square} to have that 
\[
Regret_n(u) \leq \norm{u} \sqrt{\frac{A T}{2} \log \left(\frac{\frac{A}{2} T^{2C+1} \norm{u}^2}{B^2} +1 \right)} - \frac{B}{T^{C}} + \epsilon+D~. \qedhere
\]
\end{proof}


\subsection{Proofs of Theorem~\ref{thm:oracle_fraction} and Theorem~\ref{thm:oracle_fraction_changing}}

We first state a couple of useful indentities.
For any $ 0\leq p < 1$
\[
D\left(\frac{1}{2}+\frac{p}{2}\middle\|\frac{1}{2}\right) = D\left(\frac{1}{2}-\frac{p}{2}\middle\|\frac{1}{2}\right)= \frac{1+p}{2} \log(1+p) + \frac{1-p}{2} \log(1-p).
\]
The extension for continuity of $D(\frac{1}{2}+\frac{p}{2}||\frac{1}{2})$ in $p=1$ is $\log(2)$.
Also,
\[
\left(\frac{n}{n-q}\right)^{n-q} \left(\frac{n}{q}\right)^{q} = 2^n \exp\left(-n D\left(\frac{q}{n}\middle\|\frac{1}{2}\right)\right),
\]
and
\begin{equation}
\label{eq:div_2}
\left(1+x\right)^\frac{1+x}{2} \left(1-x\right)^\frac{1-x}{2}= \exp\left( D\left(\frac{1}{2}+\frac{x}{2}\middle\|\frac{1}{2}\right) \right)
\end{equation}

Also, for any $-\frac{1}{2} \leq x\leq \frac{1}{2}$ we have
\[
\frac{x^2}{2} +\frac{x^4}{12}\leq D\left(\frac{1}{2}+\frac{x}{2}\middle\|\frac{1}{2}\right) \leq \frac{x^2}{2} + \frac{x^4}{5}.
\]

\begin{proof}[Proof of Theorem~\ref{thm:oracle_fraction}]
From the betting strategy we have
\[
\wealth_t=\wealth_{t-1} + w_t \, g_t = \wealth_{t-1} + \beta \, \wealth_{t-1} \, g_t = \wealth_{t-1} (1+\beta \, g_t)~.
\]
Hence
\[
\wealth_n=\epsilon \prod_{t=1}^n (1+\beta g_t) = \epsilon (1+\beta)^\frac{n+Z}{2} (1-\beta)^\frac{n-Z}{2},
\]
where $G=\sum_{t=1}^n g_t$.
It is easy to show that the maximum value of $\wealth_n$ w.r.t. $\beta$ is in $\beta=\frac{G}{n}$. 
Hence, we have
\[
\wealth_n = \epsilon \left(1+\frac{G}{n}\right)^\frac{n+G}{2} \left(1-\frac{G}{n}\right)^\frac{n-G}{2} 
= \epsilon \left[\left(1+\frac{G}{n}\right)^\frac{1+\frac{G}{n}}{2} \left(1-\frac{G}{n}\right)^\frac{1-\frac{G}{n}}{2}\right]^n 
= \epsilon \exp\left( n \, D\left(\frac{1}{2}+\frac{G}{2n}\middle\|\frac{1}{2}\right) \right),
%\leq \epsilon \exp \left(\frac{Z^2}{2 n} + \frac{Z^4}{5 n^3}\right),
\]
where in the last equality we used \eqref{eq:div_2}.
%or 
%\[
%\frac{x^2}{2} +\frac{x^4}{12}\leq \frac{1+x}{2} \log(1+x) + \frac{1-x}{2}\log(1-x) \leq \frac{x^2}{2} + \log(2)-.5
%\]
%where the lhs is given by Taylor expansion.
\end{proof}

The following tail bound for Binomial variables is from~\cite{Orabona13} and we report here the proof for completeness.
\begin{theorem}
\label{lemma:bin}
Let $n\geq2$ an even number of Bernoulli random variables $b_i$. Then for any $k \in \Nat_0$ such that $k\leq \frac{1}{2}n-1$, we have
\[
P\left( \sum_{i=1}^n b_i \geq \frac{1}{2} n + k\right) 
\geq  \frac{\exp\left(-n\, D\left(\frac{1}{2}+\frac{k}{n}\middle\|\frac{1}{2}\right)\right)}{2 \exp\left(\frac{1}{6}\right)} \frac{\sqrt{2 \pi}}{(\pi-1)y+\sqrt{y^2+2 \pi}},
\]
where $y=\frac{2 k}{\sqrt{n}}$.
\end{theorem}
\begin{proof}
We use Theorem~2 in \cite{McKay1989}, that specialized to our case says that
\begin{equation}
\label{eq:bin_1}
P\left( \sum_{i=1}^n b_i \geq  \frac{1}{2} n + k  \right) 
\geq \sqrt{n} \binom{n-1}{ \frac{1}{2} n + k -1} 2^{-n} \frac{Q(y)}{\phi(y)},
\end{equation}
where $\phi(x)$ is the unit variance, zero mean Gaussian, $\frac{1}{\sqrt{2 \pi}} \exp(-\frac{x^2}{2})$ and $Q(x)$ is its CDF, $\int_{x}^{+\infty} \phi(u) du$.

We start lower bounding the ratio $\frac{Q(y)}{\phi(y)}$. Using the inequality in \cite{Boyd59}, that says
\[
\frac{Q(y)}{\phi(y)} 
= \exp\left(\frac{x^2}{2}\right) \int_{x}^{+\infty} \exp\left(-\frac{t^2}{2}\right) dt
\geq \frac{\pi}{(\pi-1)x+\sqrt{x^2+2 \pi}}.
\]

To bound the binomial coefficient we make use of the following Stirling approximation, for any $n\geq 1$,
\[
\sqrt{2 \pi n} n^n \exp(-n) < n! < \exp\left(\frac{1}{12}\right)\sqrt{2 \pi n} n^n \exp(-n)~.
\]
Hence, for any $n \geq 2$ and $1\leq q \leq n-1$, after some algebra we obtain
\begin{align*}
{n \choose q} 
&\geq \frac{1}{\exp\left(\frac{1}{6}\right) \sqrt{2 \pi}} \left(\frac{n}{n-q}\right)^{n-q} \left(\frac{n}{q}\right)^{q} \sqrt{\frac{n}{q(n-q)}} \\
&\geq \frac{1}{\exp\left(\frac{1}{6}\right) \sqrt{2 \pi}} 2^n \exp\left(-n D\left(\frac{q}{n}\middle\|\frac{1}{2}\right)\right) \sqrt{\frac{n}{q(n-q)}}.
\end{align*}
where in the equality we used the definition of $D\left(\cdot\middle\|\cdot\right)$.
Also, we have
\begin{equation}
\label{eq:bin_3}
{n-1 \choose \frac{1}{2} n + k - 1} = {n \choose \frac{1}{2} n + k} \left(\frac{1}{2} + \frac{k}{n}\right) .
\end{equation}
Putting together \eqref{eq:bin_1}-\eqref{eq:bin_3}, and using the definition of $y$ we have
\begin{align*}
P\left( \sum_{i=1}^n b_i \geq \frac{1}{2} n + k \right) 
&\geq \frac{1}{\exp\left(\frac{1}{6}\right) \sqrt{2 \pi}} \exp\left(-n\, D\left(\frac{1}{2}+\frac{k}{n}\middle\|\frac{1}{2}\right)\right) \sqrt{\frac{\frac{1}{2} + \frac{k}{n}}{\frac{1}{2}-\frac{k}{n}}}  \frac{Q(y)}{\phi(y)} \\
&\geq \frac{1}{\exp\left(\frac{1}{6}\right) \sqrt{2 \pi}} \exp\left(-n\, D\left(\frac{1}{2}+\frac{k}{n}\middle\|\frac{1}{2}\right)\right) \frac{\pi}{(\pi-1)y+\sqrt{y^2+2 \pi}}. \qedhere
\end{align*}
\end{proof}

We can now prove Theorem~\ref{thm:oracle_fraction_changing}.

\begin{proof}[Proof of Theorem~\ref{thm:oracle_fraction_changing}]
First observe that, even knowing all the outcomes of $g_t$, we cannot gain more than $\epsilon 2^n$, simply betting on each round all the money on the correct outcome.

Then, for a specific function $g(\cdot,\cdot)$ that grows in the first argument, we will show that
\begin{align*}
\min_{|\sum_{t=1}^n g_t| \geq 2 k} \sum_{t=1}^n w_t g_t 
\leq \epsilon \, g(k,n), \ \forall 0 \leq k \leq n-1.
\end{align*}
When $k=n/2$, we cannot gain more than $\epsilon 2^n$, this is why we can safely consider only $0\leq k\leq n/2-1$.
From this we will infer that
\begin{align*}
\min_{g_t} \sum_{t=1}^n w_t g_t - \epsilon\min\left(g\left(\frac{|\sum_{t=1}^n g_t|}{2},n\right),2^n\right) \leq 0,
\end{align*}
that implies the stated inequality.

Set $r_t$ as independent random variable that assumes the value of 1 with probability 0.5 and -1 with probability 0.5.
Hence, we have that $\E[ \sum_{t=1}^n w_t r_t ]=0$, and also $\sum_{t=1}^n w_t r_t \geq -\epsilon$ because we never lose more than the initial amount of money.

Hence we have
\begin{align*}
\min_{|\sum_{t=1}^n g_t| \geq 2 k} \sum_{t=1}^n w_t g_t 
\leq \E\left[\sum_{t=1}^n w_t r_t \bigg| \left|\sum_{t=1}^n r_t\right| \geq 2 k\right], \ \forall 0 \leq k \leq n-1.
\end{align*}

For any $k\geq 0$, it follows that
\begin{align*}
0&=\E\left[\sum_{t=1}^n w_t r_t \right] 
= \E\left[\sum_{t=1}^n w_t r_t \bigg| \left|\sum_{t=1}^n r_t \right|< 2 k\right] P\left(\left|\sum_{t=1}^n r_t\right|< 2k \right)\\
&\qquad+\E\left[\sum_{t=1}^n w_t r_t \bigg| \left|\sum_{t=1}^n r_t \right| \geq 2 k\right] P\left(\left|\sum_{t=1}^n r_t\right|\geq 2 k\right) \\
&=\E\left[\sum_{t=1}^n w_t r_t \bigg| \left|\sum_{t=1}^n r_t \right|< 2 k\right] \left(1-P\left(\left|\sum_{t=1}^n r_t\right|\geq 2 k\right)\right)\\
&\qquad+\E\left[\sum_{t=1}^n w_t r_t \bigg| \left|\sum_{t=1}^n r_t \right| \geq 2 k\right] P\left(\left|\sum_{t=1}^n r_t\right|\geq 2 k\right) \\
&\geq -\epsilon \left(1-P\left(\left|\sum_{t=1}^n r_t\right|\geq 2 k\right)\right)+\E\left[\sum_{t=1}^n w_t r_t \bigg| \left|\sum_{t=1}^n r_t\right| \geq 2 k\right] P\left(\left|\sum_{t=1}^n r_t \right|\geq 2 k\right),
\end{align*}
hence
\begin{align*}
\E\left[\sum_{t=1}^n w_t r_t \bigg| \left|\sum_{t=1}^n r_t\right| \geq 2 k\right]
&\leq \frac{\epsilon}{P\left(\left|\sum_{t=1}^n r_t\right|\geq 2 k\right)} -\epsilon
= \frac{\epsilon}{2 P\left(\sum_{t=1}^n r_t \geq 2 k\right)} -\epsilon\\
&= \frac{\epsilon}{2 P\left(\sum_{t=1}^n \frac{r_t + 1}{2} \geq \frac{1}{2}n+ k \right)}-\epsilon.
\end{align*}
Notice that $\frac{r_t + 1}{2}$ are Bernoulli random variables.
Using Theorem~\ref{lemma:bin}, we have
\begin{align*}
\E\left[\sum_{t=1}^n w_t r_t \bigg| \left|\sum_{t=1}^n r_t\right| \geq 2 k\right]
&\leq \epsilon \left(\exp\left(\frac{1}{6}\right)\sqrt{2 \pi}\frac{2k}{\sqrt{n}} +2\exp\left(\frac{1}{6}\right)-1\right)\exp\left(n \, D\left(\frac{1}{2}+\frac{k}{n}\middle\|\frac{1}{2}\right)\right)~. \qedhere
\end{align*}
\end{proof}


\subsection{Proof of Theorem~\ref{theo:cocob}}
\begin{proof}
We want to prove that, for any $t$
\[
(1+b_t z_t) \epsilon \exp\left( F(x,\delta+\sum_{i=1}^{t-1} z_i) - \sum_{i=1}^{t-1} \frac{z_i}{a (\delta+\sum_{j=1}^{i-1} z_j)}\right)
\geq \epsilon \exp\left(F(x+z_t,\delta+\sum_{i=1}^{t} z_i) - \sum_{i=1}^{t} \frac{z_i}{a (\delta+\sum_{j=1}^{i-1} z_j)}\right),
\]
where $F(x,G)= G \, D(\frac{1}{2}+\frac{x}{2 G}||\frac{1}{2})$.
The above is equivalent to prove
\[
\ln(1+b_t z_t) + F(x,\delta+\sum_{i=1}^{t-1} z_i)- \sum_{i=1}^{t-1} \frac{z_i}{a (\delta+\sum_{j=1}^{i-1} z_j)}
\geq F(x+z_t,\delta+\sum_{i=1}^{t} z_i)- \sum_{i=1}^{t} \frac{z_i}{a (\delta+\sum_{j=1}^{i-1} z_j)}~.
\]

Denote by $\sg_{t-1}=\sum_{i=1}^{t-1} z_i$ and consider the function 
\[
\phi(z)=-\log(1+b_t z) + F(x+z,\sg_{t-1} + |z|)~.
\]
Assume that $\phi(z)$ is piece-wise convex on $[-\infty,0]$ and $[0,\infty]$. Hence, we have that
\begin{align*}
&\phi(z) \leq \phi(0)+z (\phi(1)-\phi(0)), \forall 0 \leq z\leq 1\\
&\phi(z) \leq \phi(0)+z (\phi(0)-\phi(-1)), \forall -1 \leq z\leq 0~.
\end{align*}
Also, we set $b_t$ such that $\phi(1)=\phi(-1)$, that is
\[
b_t = \frac{\exp(A_{t-1})-1}{\exp(A_{t-1})+1},
%= 2 \, S\left(\frac{4 x}{a (\sg_{t-1} + 1)}\right)-1
\]
where $A_{t-1}=F(x+1,\sg_{t-1} + 1)-F(x-1,\sg_{t-1} + 1)$.
Hence we have
\[
\phi(z) \leq \phi(0)+ |z| (\phi(1)-\phi(0)), \forall -1 \leq z\leq 1,
\]
that is
\begin{align*}
F(x,\sg_{t-1})-F(x+z,\sg_{t-1}+|z|) + \log(1+b_t z_t) 
& = \phi(0) - \phi(z) \\
& \geq |z| \left(\phi(0) - \phi(1)\right) \\
&= |z| \left(F(x,\sg_{t-1}) - F(x+1,\sg_{t-1}+1) + \log(1+b_t)\right), \forall -1 \leq z\leq 1~.
\end{align*}

Using this relation we have that
\begin{align*}
&-F(x+z,\sg_{t-1}+|z|) + F(x,\sg_{t-1})+\log(1+b_t z_t)-\sum_{i=1}^{t-1} \frac{|z_i|}{a (\sg_{i-1} + 1)} \\
&\qquad \geq |z_t| \left(F(x,\sg_{t-1}) - F(x+1,\sg_{t-1}+1) + \log(1+b_t)\right) - \sum_{i=1}^{t-1} \frac{|z_i|}{a (\sg_{i-1} + 1)} \\
&\qquad = |z_t| \left(F(x,\sg_{t-1}) - F(x+1,\sg_{t-1}+1) + \log(1+\frac{\exp(A_{t-1})-1}{\exp(A_{t-1})+1})\right) - \sum_{i=1}^{t-1} \frac{|z_i|}{a (\sg_{i-1} + 1)}~.
%&\qquad = |z_t| \left(\frac{(\sg_{t-1} + 1) x^2 - (x^2+2 x )\sg_{t-1} }{a \sg_{t-1}(\sg_{t-1} + 1)} + \log(1+b_t)\right) - \sum_{i=1}^{t} \frac{|z_i|}{a (\sg_{i-1} + 1)}\\
%&\qquad = |z_t| \left(\frac{x^2}{a \sg_{t-1}(\sg_{t-1} + 1)}-\frac{2 x}{a (\sg_{t-1} + 1)} + \log(1+b_t)\right) - \sum_{i=1}^{t} \frac{|z_i|}{a (\sg_{i-1} + 1)}~.
\end{align*}

If $A_{t-1}<0$, that is $F(x+1,\sg_{t-1}+1) \leq F(x-1,\sg_{t-1}+1)$, we have that 
\begin{align*}
&F(x,\sg_{t-1}) - F(x+1,\sg_{t-1}+1) + \log(1+\frac{\exp(A_{t-1})-1}{\exp(A_{t-1})+1})\\
&\qquad = F(x,\sg_{t-1}) - F(x-1,\sg_{t-1}+1) + F(x-1,\sg_{t-1}+1) - F(x+1,\sg_{t-1}+1) + \log(1+\frac{\exp(A_{t-1})-1}{\exp(A_{t-1})+1})\\
&\qquad \geq F(x,\sg_{t-1}) - F(x-1,\sg_{t-1}+1) -A_{t-1} + \log(1+\frac{\exp(A_{t-1})-1}{\exp(A_{t-1})+1})\\
\end{align*}

Consider the function $\phi(x)=\log(1+\frac{\exp(x)-1}{\exp(x)+1})$. We have $\phi(-x)=\phi(x)-x$.


\end{proof}