- EDF: changed how semi-analytical gain is calculated so that it's ac…

…curate even when power is zero - Included gradient calculation in script 'capacity_nonlinear_regime_relaxed'. SNR gradient was validated numerically and it's accurate. Gain gradient is still inaccurate or incorrect. - Optimize power load and EDF length in the nonlinear regime now includes hybrid optimization with fmincon once particle swarm ends. - C++ function to compute gradient of nonlinear noise power - updated documentation
CY2R · Jan 29, 2018 · 2a91371 · 2a91371
1 parent fcfaf69
commit 2a91371
Show file tree

Hide file tree

Showing 9 changed files with 524 additions and 41 deletions.
diff --git a/edfa/Channels.m b/edfa/Channels.m
@@ -20,7 +20,8 @@
 
     methods
         function obj = Channels(varargin)
-            %% Constructor
+            %% Constructor:
+            % Channels(wavelength, P, direction)
             if length(varargin) == 3 % Passed P, lamb, and direction
                 obj.wavelength = varargin{1};
                 obj.P = varargin{2};
@@ -60,6 +61,11 @@
             E = self.h*self.c./self.wavelength;
         end
 
+        function S = sample(self, idx)
+            %% Create another class Channels with a subset of channels from the orignal class
+            S = Channels(self.wavelength(idx), self.P(idx), self.direction);
+        end
+
         function plot(self, h)
             %% Plot channels power in dBm
             if exist('h', 'var') % handle was passed

diff --git a/edfa/EDF.m b/edfa/EDF.m
@@ -102,12 +102,13 @@
 
             % Calculate the output flux for each individual signal & pump
             % using equation (24) of [1]
-            Qout_k = Qin_k.*exp(a*(Qin - Qout) - b);
+            Gain = exp(a*(Qin - Qout) - b);
+            Qout_k = Qin_k.*Gain;
 
             Ppump_out = Qout_k(1:Pump.N).*self.Ephoton(Pump.wavelength);
             Psignal_out = Qout_k(Pump.N+1:end).*self.Ephoton(Signal.wavelength);
 
-            GaindB = 10*log10(Psignal_out./Signal.P);
+            GaindB = 10*log10(Gain(Pump.N+1:end));
         end 
 
         %% Analytical model

diff --git a/edfa/doc/main.tex b/edfa/doc/main.tex
@@ -334,39 +334,184 @@ \section{Gaussian noise model}
 
 \noindent where $\alpha$ is the span attenuation, $\gamma$ is the nonlinear coefficient, $\beta_2$ is the dispersion propagation constant, $L_s$ is the span length, and $N_s$ is the number of spans.
 
-Using the discretization presented in \cite{Roberts2016}, the nonlinear power in each of the $N$ channels can be expressed as a function of the signal power at channel $n$, $P_n$, is given by
+Using the discretization presented in \cite{Roberts2016}, the nonlinear power in each of the $N$ channels can be expressed as a function of the signal power at channel $n$, $P_n$:
 
 \begin{equation}
-	NL_n = \sum_{i = 1}^{N}\sum_{j = 1}^NP_iP_jP_{i+j-n}D(i, j, n), \quad n = 1, \ldots, N
+	\mathrm{NL}_n = \sum_{n_1 = 1}^{N}\sum_{n_1 = 1}^N\sum_{l=-1}^{1}P_{n_1}P_{n_2}P_{i+j-n+l}D_l(n_1, n_2, n), \quad n = 1, \ldots, N
 \end{equation}
-for $1 \leq i+j-n \leq N$. This equation only includes the dominant nonlinear terms and disregards the corner contributions ($l = \pm$ in \cite{Roberts2016}).
+for $1 \leq i+j-n+l \leq N$. In this equation, $D_l(i, j, n)$ is an integral that defines a set of system-specific coefficients that determine the strength of the four-wave mixing component that falls on channel $n$, generated by channels $n_1$, $n_2$, and $n_1 + n_2 - n+l$ \cite{Roberts2016}.
 
-The coefficient $D(i, j, n)$ is defined as
+The coefficient $D_l(i, j, n)$ is defined as
 \begin{align} \label{eq:Dcoeff-original}\nonumber 
-	D(i, j, n) &= \frac{16}{27}\int_{-\Delta f/2}^{\Delta f/2}\int_{-\Delta f/2}^{\Delta f/2}\int_{-\Delta f/2}^{\Delta f/2} \\ \nonumber
-	&\cdot\rho(x + i\Delta f, y + j\Delta f, z + n\Delta f)\chi(x + i\Delta f, y + j\Delta f, z + n\Delta f) \\
-	&\cdot g(x)g(y)g(z)g(x+y-z)R_s\partial x\partial y\partial z,
+D(n_1, n_2, n) &= \gamma^2\frac{16}{27}\int_{-\Delta f/2}^{\Delta f/2}\int_{-\Delta f/2}^{\Delta f/2}\int_{-\Delta f/2}^{\Delta f/2} \\ \nonumber
+&\cdot\rho(x + n_1\Delta f, y + n_2\Delta f, z + n\Delta f)\chi(x + n_1\Delta f, y + n_2\Delta f, z + n\Delta f) \\
+&\cdot g(x)g(y)g(z)g(x+y-z+l\Delta f)\partial x\partial y\partial z,
 \end{align}
-where $g(f)$ is the spectral shape of each channel, which has been normalized so that $\int g(f) = 1$. Assuming that the spectral shape is rectangular with width $\Delta f$, we would have
+where $R_s$ is the symbol rate, $\Delta f$ is the channel spacing, and $g(f)$ is the spectral shape of each channel, which has been normalized so that $\int g(f) = 1$. Assuming that the spectral shape is rectangular with width $\Delta f = R_s$, we would have
 \begin{equation}
 	g(f) = \begin{cases}
 	\frac{1}{\Delta f}, & |f| \leq \frac{\Delta f}{2} \\
 	0, & \text{otherwise}
 	\end{cases}
 \end{equation} 
 
-This assumption also implies that $\Delta f = R_s$.
+Following this assumption, \eqref{eq:Dcoeff-original} reduces to 
 
-To avoid the nonlinearity introduced by the condition on $g(f)$, it is necessary to modified the integration limits so that the function $g(x+y-z)$ in the integrand of \eqref{eq:Dcoeff-original} is always nonzero. Therefore, \eqref{eq:Dcoeff-original} becomes
+\begin{align} \label{eq:Dcoeff-original}\nonumber 
+D(n_1, n_2, n) &= \gamma^2\frac{16}{27}\frac{1}{\Delta f^3}\int_{-\Delta f/2}^{\Delta f/2}\int_{-\Delta f/2}^{\Delta f/2}\int_{-\Delta f/2}^{\Delta f/2} \\ \nonumber
+&\cdot\rho(x + n_1\Delta f, y + n_2\Delta f, z + n\Delta f)\chi(x + n_1\Delta f, y + n_2\Delta f, z + n\Delta f) \\
+&\cdot g(x+y-z+l\Delta f)\partial x\partial y\partial z,
+\end{align}
+
+To avoid the nonlinearity in the integrand introduced by $g(x+y-z+l\Delta f)$, we could modify the integration boundaries, but numerical methods don't seem to have a problem integrating with the nonlinearity in the integrad.
+
+The oscillatory behavior of $\chi(f_1, f_2, f)$ makes the integration above difficult. To circumvent this problem, we can use the $\epsilon$ power law for scaling the non-linear noise power $NL_n$ for 1 span to $N_s$ spans \cite{Poggiolini2012}:
+
+\begin{equation}
+	NL_n\Big|_{N_s} = NL_n\Big|_{N_s = 1}\cdot N_s^{1+\epsilon},
+\end{equation}
+where $\epsilon$ can be computed numerically, but it's usually $\epsilon \approx 0.05$ for a sufficiently large optical bandwidth. Note that for $N_s = 1$, $\chi(f_1, f_2, f) = 1$, which simplifies the numerical integration.
+
+The functions $\rho(f_1, f_2, f)$ and $\chi(f_1, f_2, f)$ in the integrand of $D(i, j, n)$ depend on the frequencies $f_1, f_2, f$ only as $(f_1-f)(f_2-f)$. As a result, we can represent $D(n_1, n_2, n)$ as a symmetric $2N-1\times 2N-1$ matrix, which reduces the number of total computed entries.
+\begin{equation}
+	D_{ij} = D(n_1, n_2, n), \quad i = n_1 - n, j = n_2 - n
+\end{equation}
+
+\section{Gradient calculation}
+
+When the particle swarm optimization algorithm ends, an interior-point local optimization algorithm starts. In Matlab that's realized by the function \texttt{fmincon}, which minimizes a multivariate constrained function. The objective does not necessarily need to be convex.
+
+For the local optimization, it is necessary to compute the gradient analytically otherwise the solutions will exhibit an oscillatory behavior due to gradient estimation errors.
+
+Including nonlinearity, the objective function is given by
+
+\begin{equation} \label{eq:grad:SE}
+	SE = 2\sum_{k = 1}^N s(G_k - A)\log_2(1 +  SNR_k),
+\end{equation}
+where $s(x) = 0.5(\mathrm{tanh(Dx)}-1)$ is a function that approximates a step function and $D$ determines the sharpness of the step function approximation, $G_k$ is the gain of the $k$-th channel in dB, $A_k$ is the span attenuation in the $k$-th channel in dB, and $SNR_k$ is the SNR at the $k$-th channel, which is given by
+\begin{equation}
+	SNR_k = \frac{P_k}{P_{ASE, k} + \mathrm{NL}_k(P)}
+\end{equation}
 
-\begin{align} \label{eq:Dcoeff}\nonumber 
-D(i, j, n) &= \frac{16}{27}\frac{1}{\Delta f^3}\int_{-\Delta f/2}^{\Delta f/2}\int_{\max\{-\Delta f/2, -x-\Delta f/2\}}^{\min\{\Delta f/2, -x+\Delta f/2\}}\int_{\max\{-\Delta f/2, x+y-\Delta f/2\}}^{\min\{\Delta f/2, x+y+\Delta f/2\}} \\
-&\cdot\rho(x + i\Delta f, y + j\Delta f, z + n\Delta f)\chi(x + i\Delta f, y + j\Delta f, z + n\Delta f)\partial z\partial y\partial x,
+The ASE power $P_{ASE, k} = 2aNn_{sp}(\lambda_k)h\nu_k\Delta f$ does not depend on the signal power $P_k$, but the nonlinear noise power is a function of the power vector $\tilde{P}$ i.e., it depends on the launch power of all channels.
+
+Deriving SE with respect to $P_m$ yields
+
+\begin{equation} \label{eq:partial_SE}
+	\frac{\partial SE}{\partial P_m} = \frac{2}{\log(2)}\sum_{k = 1}^N \bigg [
+	\frac{\partial G_k}{\partial P_m} \log(1 + SNR_k) s^{\prime}(G_k - A) + \frac{\partial SNR_k}{\partial P_m}\frac{ s(G_k - A)}{1 + SNR_k}
+	\bigg]
+\end{equation}
+
+The equation above depends on the gradient of the SNR and on the gradient of the gain. These gradients will be calculated in the next subsections.
+
+\subsection{Gain Gradient}
+
+From the semi-analytical derivation the gain is given by the transcendental equation:
+
+\begin{align} 
+G_k &= \exp\Big(a_k(Q^{in} - Q^{out}) - b_k\Big) \\ \label{eq:partial_Gk}
+& \implies \frac{\partial G_k}{\partial P_m} = a_k\Big(\frac{\partial Q^{in}}{\partial P_m} - \frac{\partial Q^{out}}{\partial P_m}\Big)G_k
+\end{align}
+where
+\begin{equation}
+\frac{\partial Q^{in}}{\partial P_m} = \frac{1}{h\nu_m}
+\end{equation}
+and 
+\begin{align}
+Q^{out} &= \sum_k\frac{P_k}{h\nu_k}G_k \\ \label{eq:partial_Qout}
+& \implies \frac{\partial Q^{out}}{\partial P_m} = \frac{1}{h\nu_m}G_m + \sum_k  \frac{P_k}{h\nu_k}\frac{\partial G_k}{\partial P_m}
 \end{align}
 
-Given the oscillatory behavior of $\chi(f_1, f_2, f)$ it is also use the iterative integration method in \texttt{integral3}.
+Substituting \eqref{eq:partial_Gk} in \eqref{eq:partial_Qout} and solving for $\frac{\partial Q^{out}}{\partial P_m}$ yields
+\begin{align} \nonumber
+\frac{\partial Q^{out}}{\partial P_m} = \frac{1}{h\nu_m}G_m + \sum_k  \frac{P_k}{h\nu_k}a_k\Big(\frac{1}{h\nu_m} - \frac{\partial Q^{out}}{\partial P_m}\Big)G_k \\ \nonumber
+\frac{\partial Q^{out}}{\partial P_m}\bigg(1 + \sum_k\frac{P_k}{h\nu_k}a_kG_k \bigg) = \frac{1}{h\nu_m}\bigg(G_m + \sum_k\frac{P_k}{h\nu_k}a_kG_k\bigg) \\
+\frac{\partial Q^{out}}{\partial P_m} = \frac{1}{h\nu_m}\frac{G_m + \sum_k\frac{P_k}{h\nu_k}a_kG_k}{1 + \sum_k\frac{P_k}{h\nu_k}a_kG_k}
+\end{align}
+
+Now substituting back in \eqref{eq:partial_Gk}:
+
+\begin{align} \nonumber
+\frac{\partial G_k}{\partial P_m} &= \frac{a_k}{h\nu_m}\Bigg(1 - \frac{G_m + \sum_k\frac{P_k}{h\nu_k}a_kG_k}{1 + \sum_k\frac{P_k}{h\nu_k}a_kG_k}\Bigg)G_k \\
+& = \frac{a_k}{h\nu_m}\Bigg(\frac{1 - G_m}{1 + \sum_k\frac{P_k}{h\nu_k}a_kG_k }\Bigg)G_k 
+\end{align}
+
+\subsection{SNR gradient}
+The SNR depends on the $m$th channel power through the power itself and through the nonlinear noise power. 
+
+Assuming that $\frac{\partial G_k}{\partial P_m} \approx 0$ i.e., near the optimal solution the gain does not vary with the power, we have
+
+\begin{equation} \label{eq:dSE_dP}
+\frac{\partial SE}{\partial P_m} \approx \frac{2}{\log(2)}\sum_{k = 1}^N \frac{\partial SNR_k}{\partial P_m}\bigg [
+\frac{s(G_k - A)}{1 + SNR_k}
+\bigg]
+\end{equation}
+
+The derivative of the SNR is given by
+
+\begin{align}
+	\frac{\partial SNR_k}{\partial P_m} &= \begin{cases}
+	-\frac{\partial \mathrm{NL}_k}{\partial P_m}\frac{P_k}{(P_{ASE, k} + \mathrm{NL}_k)^2}, & k \neq m \\
+	-\frac{\partial \mathrm{NL}_k}{\partial P_m}\frac{P_k}{(P_{ASE, k} + \mathrm{NL}_k)^2} + \frac{1}{P_{ASE, k} + \mathrm{NL}_k}, & k = m
+	\end{cases} \\
+	&= \begin{cases}
+	-\frac{\partial \mathrm{NL}_k}{\partial P_m}\frac{SNR_k^2}{P_k}, & k \neq m \\
+	-\frac{\partial \mathrm{NL}_k}{\partial P_m}\frac{SNR_k^2}{P_k} + \frac{SNR_k}{P_k}, & k = m
+	\end{cases} \\
+	& = \mathds{1}\{k= m\}\frac{SNR_m}{P_m} - \frac{\partial \mathrm{NL}_k}{\partial P_m}\frac{SNR_k^2}{P_k}
+\end{align}
+
+Note that this equation is written in terms of $SNR_k$ for convenience. In practice, it's easier to write out this equation as a function of the total noise in order to avoid divisions by the power $P_k$, which could go to zero during optimization. 
+
+For the nonlinear noise power, we have
+
+\begin{equation} \label{eq:nonlinear-noise-power}
+	\mathrm{NL}_n(\tilde{P}) = \sum_{n_1 = 1}^{N}\sum_{n_1 = 1}^N\sum_{l=-1}^{1}\tilde{P}_{n_1}\tilde{P}_{n_2}\tilde{P}_{i+j-n+l}D_l(n_1, n_2, n), \quad n = 1, \ldots, N,
+\end{equation}
+where $\tilde{P}$ is the launch power into the fiber at each channel, so that
+\begin{equation}
+	\tilde{P}_k = P_ke^{\alpha_{\text{SMF}, k}L},
+\end{equation}
+since the gain flattening filter ideally makes the channel gain equal to the fiber attenuation. Therefore, once we know $\frac{\partial NL_k}{\partial\tilde{P}_m}$, we can obtain $\frac{\partial NL_k}{\partial P_m}$ from
+\begin{equation}
+	\frac{\partial NL_k}{\partial P_m} = e^{\alpha_{\text{SMF}, m}L}\frac{\partial NL_k}{\partial\tilde{P}_m}
+\end{equation}
+
+\noindent (!! check) this may require $\alpha_{\text{SMF}, m}$ to be constant!
+
+$\frac{\partial NL_k}{\partial\tilde{P}_m}$ can be computed by inspection of \eqref{eq:nonlinear-noise-power}, where each term is differentiated independently. An equation for the gradient of $\mathrm{NL}_n(\tilde{P})$ with respect to logarithmic power is given in \cite[Appendix]{Roberts2016}. 
+
+\subsection{Spectral efficiency gradient}
+
+Returning to \eqref{eq:partial_SE}
+
+\begin{equation}
+\frac{\partial SE}{\partial P_m} = \frac{2}{\log(2)}\sum_{k = 1}^N \bigg [
+\frac{\partial G_k}{\partial P_m} \log(1 + SNR_k) s^{\prime}(G_k - A) + \frac{\partial SNR_k}{\partial P_m}\frac{ s(G_k - A)}{1 + SNR_k}
+\bigg],
+\end{equation}
+
+we can now substitute
+\begin{align}
+	\frac{\partial G_k}{\partial P_m} &= \frac{a_k}{h\nu_m}\Bigg(\frac{1 - G_m}{1 + \sum_k\frac{P_k}{h\nu_k}a_kG_k }\Bigg)G_k \\
+	\frac{\partial SNR_k}{\partial P_m} &= \mathds{1}\{k= m\}\frac{SNR_m}{P_m} - \frac{\partial \mathrm{NL}_k}{\partial P_m}\frac{SNR_k^2}{P_k}
+\end{align}
+
+
+Since the optimization is realized with power values in dBm ($\mathcal{P}$), it's necessary to change the differentiation variable:
+
+\begin{equation}
+\frac{\partial SE}{\partial P_m} = \frac{\partial SE}{\partial \mathcal{P}_m}\frac{\partial\mathcal{P}_m}{\partial P_m} \implies \frac{\partial SE}{\partial \mathcal{P}_m} =  \bigg(\frac{\partial\mathcal{P}_m}{\partial P_m}\bigg)^{-1}\frac{\partial SE}{\partial P_m} = \frac{P_m\log(10)}{10}\frac{\partial SE}{\partial P_m}
+\end{equation}
 
-The functions $\rho(f_1, f_2, f)$ and $\chi(f_1, f_2, f)$ in the integrand of $D(i, j, n)$ depend on the frequencies $f_1, f_2, f$ only as $(f_1-f)(f_2-f)$. As a result, we can represent $D(i, j, n)$ as a symmetric $2N-1\times 2N-1$ matrix, which reduces the number of total computed entries.
+%The final expression for the gradient is given by
+%
+%\begin{align}
+%\frac{\partial SE}{\partial\mathcal{P}_m} &\approx \frac{2}{\log(2)}\frac{P_m\log(10)}{10}\sum_{k = 1}^N \bigg[\mathds{1}\{k= m\}\frac{SNR_m}{P_m} - e^{\alpha_{\text{SMF}, m}L}\frac{\partial \mathrm{NL}_k}{\partial P_m}\frac{SNR_k^2}{P_k}\bigg]\bigg [
+%\frac{s(G_k - A)}{1 + SNR_k}
+%\bigg] 	
+%\end{align}
 
 \newpage
 \section{Notes on amplifier physics}

diff --git a/edfa/f/capacity_nonlinear_regime_relaxed.m b/edfa/f/capacity_nonlinear_regime_relaxed.m
@@ -1,4 +1,4 @@
-function [SE, SElamb] = capacity_nonlinear_regime_relaxed(X, E, Pump, Signal, problem)
+function [SE, dSE, SElamb] = capacity_nonlinear_regime_relaxed(X, E, Pump, Signal, problem)
 %% Compute system capacity in nonlinear regime for a particular EDF length and power loading specificied in vector X
 % X(1) is the EDF length, and X(2:end) has the signal power in dBm at each
 % wavelength. Simulations assume ideal gain flatenning, resulting in the
@@ -21,7 +21,8 @@
 % "Namp" spans. 
 % Output:
 % - SE: spectral efficiency in bits/s/Hz i.e., capacity normalized by bandwidth
-% - SElamb: spectral efficiency in bits/s/Hz in each wavelength 
+% - dSE: gradient of spectral efficiency with respect to X (E.L and power
+% in dBm)
 
 % Unpack parameters
 spanAttdB = problem.spanAttdB;
@@ -43,7 +44,18 @@
 offChs = (GaindB < spanAttdB);
 P = Signal.P.*(10.^(spanAttdB/10)); % optical power after gain flattening
 P(offChs) = 0;
-NL = GN_model_noise(P, D);
+
+if nargout > 1 % gradient was requested
+    [dNL, NL] = GN_model_noise_gradient(P, D);
+    dNL = dNL*Namp^(1+epsilon); % Scale gradient to "Namp" spans
+
+    % Multiply by gain, since dNL is originally computed with respect to 
+    % the launch power, while the optimzation is done with the input power
+    % to the amplifier
+    dNL = dNL.*(10.^(spanAttdB/10)); 
+else
+    NL = GN_model_noise(P, D);
+end
 
 % Scale NL noise to "Namp" spans
 NL = NL*Namp^(1+epsilon);
@@ -54,4 +66,34 @@
 NF = 2*a*nsp;
 SNR = Signal.P./(Namp*df*NF.*Signal.Ephoton + NL);
 SElamb = 2*log2(1 + SNR).*step_approx(GaindB - spanAttdB);
-SE = sum(SElamb);
+SE = -sum(SElamb);
+
+%% Computes gradient
+if nargout > 1 % gradient was requested      
+    % SNR gradient (validated numerically with accuray 1e-6)
+    S = step_approx(GaindB - spanAttdB);
+    Ninv = 1./(Namp*df*NF.*Signal.Ephoton + NL);
+    dSNR = diag(Ninv) - dNL.*(Signal.P.*Ninv.^2); % SNR gradient
+    % N x N matrix, where dSNR(i,j) = partial SNR_i / partial P_j
+
+    % Gain gradient (not accurate)
+    alpha = E.absorption_coeff(Signal.wavelength); % (1/m)
+    g = E.gain_coeff(Signal.wavelength); % (1/m)
+    xi = E.sat_param(Signal.wavelength);
+    a = (alpha + g)./xi;
+    hnu = E.Ephoton(Signal.wavelength);
+    Gain = 10.^(GaindB/10);
+    diff_step_approx = problem.diff_step_approx;
+
+    dGain = (a.*Gain)./(1 + sum(Signal.P./hnu.*a.*Gain)); % 1 x N vector
+    dGain = dGain.*((1 - Gain)./hnu).'; % N X N matrix dGain(i,j) = partial Gain_i / partial P_j   
+
+    % SE gradient with respect to power in dBm
+
+    % Uncomment one of the two lines
+    dSElin = -2/log(2)*sum(dSNR.*(S./(1 + SNR)), 2); % Considering dG/dP = 0
+%     dSElin = -2/log(2)*sum(dGain.*(log(1 + SNR).*diff_step_approx(GaindB - spanAttdB))  + dSNR.*(S./(1 + SNR)), 2); % Considering non-zero gain gradient
+
+    dSE = (log(10)/10)*(Signal.P.'.*dSElin); % converts to derivative with power in dBm
+    dSE = [0; dSE];
+end