Skip to content

Commit

Permalink
Update encoder and decoder scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
chris-hld committed Feb 1, 2024
1 parent 69022fd commit b6704ad
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 36 deletions.
47 changes: 24 additions & 23 deletions hoac_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
# Prepare
N_sph_out = conf['N_sph_in']
num_sh_out = (N_sph_out+1)**2
r_smooth = 0.75

x_tc = np.sqrt(4*np.pi)*sig_tc.get_signals()
fs = sig_tc.fs
Expand All @@ -48,7 +49,7 @@

x_tc = np.hstack((x_tc, np.zeros((num_ch, hSTFT.processing_delay))))
out_sig = np.zeros((num_sh_out, x_tc.shape[1]))
fd_sig_in = np.zeros((8, num_ch, num_bands), dtype='complex64')
fd_sig_in = np.zeros((num_slots, num_ch, num_bands), dtype='complex64')

A_nm = conf['A_nm']
beta = conf['beta']
Expand All @@ -57,7 +58,7 @@
assert num_recov

B_nm_trunc = B_nm_trunc[:, np.newaxis, :, np.newaxis]
B_nm_exp = np.zeros((num_sh_out, 8, num_ch, num_bands))
B_nm_exp = np.zeros((num_sh_out, num_slots, num_ch, num_bands))
B_nm_exp[:B_nm.shape[0], :, :, :] = B_nm[:, np.newaxis, :, np.newaxis]
N_sph_recov = int(np.sqrt(num_recov) - 1)

Expand All @@ -73,12 +74,12 @@
# Initialize and start timer
start_time = time.time()

doa = np.zeros((8, num_ch, num_bands, 3))
doa = np.zeros((num_slots, num_ch, num_bands, 3))
doa_prev = np.zeros_like(doa)
dif = np.zeros((8, num_ch, num_bands))
dif = np.zeros((num_slots, num_ch, num_bands))
dif_prev = np.zeros_like(dif)
Y = np.zeros((num_sh_out, 8, num_ch, num_bands))
X_nm = np.zeros((8, num_sh_out, num_bands), dtype=np.complex_)
Y = np.zeros((num_sh_out, num_slots, num_ch, num_bands))
X_nm = np.zeros((num_slots, num_sh_out, num_bands), dtype=np.complex_)

M = np.zeros_like(Y)
M_prev = np.zeros_like(M)
Expand All @@ -93,25 +94,25 @@

fd_sig_in[:] = hSTFT.forward(blk_in)

doa[:], dif[:] = hoac.dequantize_dirac_pars(doa_idx_stream, dif_idx_stream,
doa[:], dif[:] = hoac.dequantize_dirac_pars(doa_idx_stream[idx_blk, ...],
dif_idx_stream[idx_blk, ...],
freqs, f_qt_c,
qgrid, qdifbins, idx_blk)
qgrid, qdifbins)

M[:], Y[:] = hoac.formulate_M_Y(doa, dif, N_sph_out, B_nm_exp, beta,
num_recov, B_nm_trunc)

if idx_blk == 0:
M_prev = M
X_nm[:] = np.einsum('ldsk,dsk->dlk', 2/3*M + 1/3*M_prev, fd_sig_in)
M_prev[:] = M[:]
M = r_smooth * M + (1. - r_smooth) * M_prev
X_nm[:] = np.einsum('ldsk,dsk->dlk', M, fd_sig_in)

ene_s = np.real((fd_sig_in * fd_sig_in.conj()))
ene_dir = (1-dif) * ene_s
ene_dif = dif * ene_s
gn = hoac.opt_gain(X_nm, Y, dif,
np.real((fd_sig_in * fd_sig_in.conj())),
C_dif, orne, M_mavg)
np.clip(gn, 0.5, 2., out=gn)
g = r_smooth * gn + (1. - r_smooth) * g

gp = hoac.post_gain(X_nm, Y, ene_dir, ene_dif, C_dif, orne, M_mavg)
gp[gp > 2.] = 2.
gp[gp < .5] = .5
g = 2/3 * gp + 1/3*g
X_nm[:, num_recov:, :] = np.repeat(g, num_m, axis=0)[
np.newaxis, num_recov:, :] * X_nm[:, num_recov:, :]

Expand All @@ -136,18 +137,12 @@
'~/data/HRTFs/THK_KU100/HRIR_L2354.sofa'), N_sph_out)
out_bin = spa.sig.MultiSignal([*spa.decoder.sh2bin(0.3*out_sig, hrirs_nm)],
fs=fs)
if PLAY:
print("Playing decoded")
out_bin.play()
out_bin.save("./audio/Hoac_bin.wav", "PCM_16")

uncompressed_sig = spa.sph.sn3d_to_n3d(
spa.io.load_audio("./audio/in_sig_ambix.wav", fs).get_signals()[:num_sh_out, :])
in_bin = spa.sig.MultiSignal([*spa.decoder.sh2bin(0.3*uncompressed_sig, hrirs_nm)],
fs=fs)
if PLAY:
print("playing input")
in_bin.play()
in_bin.save("./audio/Input_bin.wav", "PCM_16")

print("RMSE(n) ratio:",
Expand All @@ -162,6 +157,12 @@
(spa.utils.rms(out_sig, axis=-1)) /
(10e-10 + spa.utils.rms(uncompressed_sig, axis=-1))))), 3))

if PLAY:
print("Playing input")
in_bin.play()
print("Playing decoded")
out_bin.play()

if PLOT:
spa.plot.sh_bar([spa.utils.rms(out_sig) /
(spa.utils.rms(uncompressed_sig+1e-10))], TODB=1,
Expand Down
27 changes: 14 additions & 13 deletions hoac_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
PEAK_NORM = True
PLOT = False

# laod signal
# load signal
fs = 48000
num_smpls = fs * 10
sig_len = 10 # in s
N_sph_in = 5

file_name = 'Audio/Ambisonics/test_scenes/bruckner_multichannelSH5N3D.wav'
Expand All @@ -36,7 +36,7 @@

in_file = spa.io.load_audio(in_path, fs)
assert in_file.fs == fs
in_sig = in_file.get_signals()[:(N_sph_in+1)**2, :num_smpls]
in_sig = in_file.get_signals()[:(N_sph_in+1)**2, :int(fs * sig_len)]
if 'sn3d' in str(in_path).lower():
in_sig = spa.sph.sn3d_to_n3d(in_sig)
print("Converted SN3D input")
Expand Down Expand Up @@ -94,7 +94,7 @@
f_qt = hoac.get_f_quantizer(hSTFT.num_bands)
num_fgroups = len(f_qt)
M_grouper = hoac.get_C_weighting(hSTFT.center_freqs)[:, None] * \
hoac.get_f_grouper(f_qt, num_fgroups, hSTFT.num_bands)
hoac.get_f_grouper(f_qt)
M_grouper = M_grouper / np.sum(M_grouper, axis=0)

qgrid, num_coarse = hoac.get_quant_grid(user_pars['metaDoaGridOrder'], None)
Expand Down Expand Up @@ -133,14 +133,14 @@
fd_sig_in = hSTFT.forward(blk_in)

for idx_slt in range(num_slots):
azi_g[idx_slt, ::], zen_g[idx_slt, ::], \
dif_g[idx_slt, ::], \
ene_g[idx_slt, ::], _ = hoac.grouped_sector_parameters(
fd_sig_in[idx_slt, :, :], A_wxyz_c, M_grouper)

dif_q_stream[idx_blk, ::] = hoac.quantize_dif(dif_g, qdifbins)
doa_idx_stream[idx_blk, ::] = hoac.quantize_doa(azi_g, zen_g, qgrid,
dif_g, None, None)
azi_g[idx_slt, ...], zen_g[idx_slt, ...], \
dif_g[idx_slt, ...], \
ene_g[idx_slt, ...], _ = hoac.grouped_sector_parameters(
fd_sig_in[idx_slt, ...], A_wxyz_c, M_grouper)
azi_g, zen_g, dif_g, ene_g = hoac.post_pars(azi_g, zen_g, dif_g, ene_g)
dif_q_stream[idx_blk, ...] = hoac.quantize_dif(dif_g, qdifbins)
doa_idx_stream[idx_blk, ...] = hoac.quantize_doa(azi_g, zen_g, qgrid,
dif_g, None, None)

start_smpl += blocksize
idx_blk += 1
Expand Down Expand Up @@ -173,11 +173,12 @@
x_transport, user_pars, fs, libpath="~/git/opus-tools/")

print('Writing output: ', time.time()-start_time, 'seconds.')
subprocess.run(["du", "-sh", "transport-data/"])
subprocess.run(["du", "-sh", "transport-data/"], check=True)

spa.io.save_audio(spa.sph.n3d_to_sn3d(in_sig).T, './audio/in_sig_ambix.wav',
fs)


if PLOT:
spa.plot.sh_rms_map(in_sig, TODB=True, title="Input SHD Signal")
spa.plot.sh_rms_map(x_nm, TODB=True, title="Coded SHD Signal")
Expand Down

0 comments on commit b6704ad

Please sign in to comment.