Skip to content

Commit

Permalink
update input format
Browse files Browse the repository at this point in the history
  • Loading branch information
YijianZhou committed Mar 21, 2021
1 parent 8a17493 commit 7f137b8
Show file tree
Hide file tree
Showing 16 changed files with 216 additions and 75 deletions.
13 changes: 6 additions & 7 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import sys
sys.path.append('/home/zhouyj/software/PAD')
sys.path.append('/home/zhouyj/software/PAL')
import data_pipeline as dp
import pickers

class Config(object):
def __init__(self):
Expand All @@ -13,22 +12,22 @@ def __init__(self):
self.get_data_dict = dp.get_data_dict
self.get_sta_dict = dp.get_sta_dict

# MESS params
# MFT params
self.temp_win_det = [1.,11.] # win for detection, pre & post P
self.temp_win_p = [0.5,1.5] # win for p pick, pre & post P
self.temp_win_s = [0.5,2.5] # win for s pick, pre & post S
self.trig_thres = 0.25 # cc thres for det & peak expansion
self.expand_len = 2.5 # win len for cc peak expansion
self.expand_len = 2. # win len for cc peak expansion
self.det_gap = 5. # gap sec for detection
self.pick_win_p = [1.5, 1.5] # win for P pick
self.pick_win_s = [2.5, 2.5] # win for S pick
self.chn_p = [2]
self.chn_s = [0,1]
self.amp_win = [1, 5] # win for get_amp, sec pre&post P
self.amp_win = [1, 4]

# data process
self.to_prep = False # False if templates are preprocessed
self.samp_rate = 50
self.freq_band = ['bandpass', [1., 40.]]
self.get_amp = pickers.STA_LTA_PCA().get_amp
self.freq_band = [2.,40.]
self.num_workers = 10

6 changes: 3 additions & 3 deletions cut_template_sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str,
default='/data3/luwf_data/Trace/Linear_Pad')
default='/data/Example_data')
parser.add_argument('--temp_pha', type=str,
default='/data3/luwf_data/Trace/JZG_temp/jzg_temp.pha')
default='input/example.temp')
parser.add_argument('--out_root', type=str,
default='./output/tmp')
default='output/example_templates')
args = parser.parse_args()


Expand Down
118 changes: 118 additions & 0 deletions cut_template_torch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
""" Cut template waveform with Pytorch (preprocess in cut)
Inputs
data_dir: dir of continuous data
temp_pha: template phase file
out_root: root dir for template data
Outputs
temp_root/temp_name/net.sta.chn
Note: temp_name == ot (yyyymmddhhmmss.ss)
"""
import os, sys, glob, shutil
sys.path.append('/home/zhouyj/software/data_prep')
import argparse
import numpy as np
import torch.multiprocessing as mp
from torch.utils.data import Dataset, DataLoader
from obspy import read, UTCDateTime
import sac
from dataset_gpu import read_ftemp, preprocess
import config
import warnings
warnings.filterwarnings("ignore")

# cut params
cfg = config.Config()
num_workers = cfg.num_workers
win_len = cfg.win_len
get_data_dict = cfg.get_data_dict


def get_sta_date(event_list):
sta_date_dict = {}
for i, [id_name, event_loc, pick_dict] in enumerate(event_list):
if i%1e3==0: print('%s/%s events done/total'%(i, len(event_list)))
# 1. get event info
event_name = id_name.split('_')[1]
event_dir = os.path.join(args.out_root, event_name)
ot = event_loc[0]
if not os.path.exists(event_dir): os.makedirs(event_dir)
for net_sta, [tp, ts] in pick_dict.items():
date = str(tp.date)
sta_date = '%s_%s'%(net_sta, date) # for one day's stream data
if sta_date not in sta_date_dict:
sta_date_dict[sta_date] = [[event_dir, tp, ts]]
else: sta_date_dict[sta_date].append([event_dir, tp, ts])
return sta_date_dict


class Cut_Templates(Dataset):
""" Dataset for cutting templates
"""
def __init__(self, sta_date_items):
self.sta_date_items = sta_date_items
self.data_dir = args.data_dir

def __getitem__(self, index):
data_paths_i = []
# get one sta-date
sta_date, samples = self.sta_date_items[index]
net_sta, date = sta_date.split('_')
net, sta = net_sta.split('.')
date = UTCDateTime(date)
# read & prep one day's data
print('reading %s %s'%(net_sta, date.date))
data_dict = get_data_dict(date, self.data_dir)
if net_sta not in data_dict: return data_paths_i
st_paths = data_dict[net_sta]
try:
stream = read(st_paths[0])
stream += read(st_paths[1])
stream += read(st_paths[2])
except: return data_paths_i
stream = preprocess(stream)
if len(stream)!=3: return data_paths_i
for [event_dir, tp, ts] in samples:
# time shift & prep
start_time = tp - win_len[0]
end_time = tp + win_len[1]
st = sac.obspy_slice(stream, start_time, end_time)
if 0 in st.max() or len(st)!=3: continue
st = st.detrend('demean') # note: no detrend here
# write & record out_paths
data_paths_i.append([])
for tr in st:
out_path = os.path.join(event_dir,'%s.%s'%(net_sta,tr.stats.channel))
tr.stats.sac.t0, tr.stats.sac.t1 = tp-start_time, ts-start_time
tr.write(out_path, format='sac')
data_paths_i[-1].append(out_path)
return data_paths_i

def __len__(self):
return len(self.sta_date_items)


if __name__ == '__main__':
mp.set_start_method('spawn', force=True) # 'spawn' or 'forkserver'
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str,
default='/data/Example_data')
parser.add_argument('--temp_pha', type=str,
default='input/example.temp')
parser.add_argument('--out_root', type=str,
default='output/example_templates')
args = parser.parse_args()

# read fpha
temp_list = read_ftemp(args.temp_pha)
sta_date_dict = get_sta_date(temp_list)
sta_date_items = list(sta_date_dict.items())
# for sta-date pairs
data_paths = []
dataset = Cut_Templates(sta_date_items)
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=None)
for i, data_paths_i in enumerate(dataloader):
data_paths += data_paths_i
if i%10==0: print('%s/%s sta-date pairs done/total'%(i+1,len(dataset)))
fout_data_paths = os.path.join(args.out_root,'data_paths.npy')
np.save(fout_data_paths, data_paths)

44 changes: 27 additions & 17 deletions dataset_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@
num_workers = cfg.num_workers
samp_rate = cfg.samp_rate
freq_band = cfg.freq_band
to_prep = cfg.to_prep
temp_win_det = cfg.temp_win_det
temp_win_p = cfg.temp_win_p
temp_win_s = cfg.temp_win_s
win_len_npts = [int(sum(win)*samp_rate) for win in [temp_win_det, temp_win_p, temp_win_s]]
temp_win_npts = [int(sum(win)*samp_rate) for win in [temp_win_det, temp_win_p, temp_win_s]]
min_sta = cfg.min_sta
max_sta = cfg.max_sta


def read_data(date, data_dir, sta_dict):
""" Read data (continuous waveform)
Input
Expand All @@ -31,6 +33,8 @@ def read_data(date, data_dir, sta_dict):
t=time.time()
print('reading continuous data')
data_dict = get_data_dict(date, data_dir)
to_del = [net_sta for net_sta in data_dict.keys() if net_sta not in sta_dict]
for net_sta in to_del: data_dict.pop(net_sta)
data_dataset = Data(data_dict, sta_dict)
data_loader = DataLoader(data_dataset, num_workers=num_workers, batch_size=None, pin_memory=True)
todel = []
Expand Down Expand Up @@ -87,9 +91,8 @@ def __getitem__(self, index):
# read stream
net_sta = self.sta_list[index]
st_paths = self.data_dict[net_sta]
if net_sta not in self.sta_dict: return net_sta, []
gain = float(self.sta_dict[net_sta]['gain'])
stream = read_stream(st_paths, gain)
stream = read_stream(st_paths, gain, True)
if len(stream)!=3: return net_sta, []
start_time = stream[0].stats.starttime
end_time = stream[0].stats.endtime
Expand All @@ -98,7 +101,7 @@ def __getitem__(self, index):
data_np = st2np(stream)[:, 0:int(86400*samp_rate)]
# calc norm data (for calc_cc)
data_cum = [np.cumsum(di**2) for di in data_np]
norm_data = np.array([np.sqrt(di[win_len_npts[0]:] - di[:-win_len_npts[0]]) for di in data_cum])
norm_data = np.array([np.sqrt(di[temp_win_npts[0]:] - di[:-temp_win_npts[0]]) for di in data_cum])
return net_sta, [data_np, norm_data]

def __len__(self):
Expand Down Expand Up @@ -128,14 +131,14 @@ def __getitem__(self, index):
# read template date
st_paths = sorted(glob.glob(os.path.join(temp_dir, '%s.*'%net_sta)))
if len(st_paths)!=3: continue
st = read_stream(st_paths)
st = read_stream(st_paths, None, to_prep)
if len(st)!=3: continue
# cut template data
temp_det = trim_stream(st, tp-temp_win_det[0], tp+temp_win_det[1])
temp_p = trim_stream(st, tp-temp_win_p[0], tp+temp_win_p[1])
temp_s = trim_stream(st, ts-temp_win_s[0], ts+temp_win_s[1])
temp = [st2np(st_i) for st_i in [temp_det, temp_p, temp_s]]
temp = [temp[i][:,0:win_len_npts[i]] for i in range(3)]
temp = [temp[i][:,0:temp_win_npts[i]] for i in range(3)]
# calc norm
norm_det = np.array([sum(tr**2)**0.5 for tr in temp[0]])
norm_p = np.array([sum(tr**2)**0.5 for tr in temp[1]])
Expand Down Expand Up @@ -172,7 +175,7 @@ def read_ftemp(ftemp):
return temp_list


""" Stream processing
""" Signal processing
"""

def preprocess(stream):
Expand All @@ -191,25 +194,32 @@ def preprocess(stream):
if resamp_factor!=1: st = st.interpolate(samp_rate)
# filter
st = st.detrend('demean').detrend('linear').taper(max_percentage=0.05, max_length=10.)
filter_type, freq_range = freq_band
if filter_type=='highpass':
return st.filter(filter_type, freq=freq_range)
if filter_type=='bandpass':
return st.filter(filter_type, freqmin=freq_range[0], freqmax=freq_range[1])


def read_stream(st_paths, gain=None):
freq_min, freq_max = freq_band
if freq_min and freq_max:
return st.filter('bandpass', freqmin=freq_min, freqmax=freq_max)
elif not freq_max and freq_min:
return st.filter('highpass', freq=freq_min)
elif not freq_min and freq_max:
return st.filter('lowpass', freq=freq_max)
else:
print('filter type not supported!'); return []


def read_stream(st_paths, gain=None, to_prep=True):
# read data
try:
st = read(st_paths[0])
st += read(st_paths[1])
st += read(st_paths[2])
except:
print('bad data'); return []
if not gain: return preprocess(st)
if not gain:
if to_prep: return preprocess(st)
else: return st
# remove gain
for i in range(3): st[i].data /= gain
return preprocess(st)
if to_prep: return preprocess(st)
else: return st


def trim_stream(stream, start_time, end_time):
Expand Down
10 changes: 5 additions & 5 deletions example_mess_workdir/README
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Copy this directory to anywhere you like as working directory

Input
PAD detection: input/example_pad.pha
PAD HypoInverse phase file (with evid): input/example_pad_hyp_all.pha
Station file (in PAD format): input/example_pad.sta
initial detection: input/example.pha
Located phase file (with evid): input/example_full.pha
Station file: input/example.sta

1. generate template phase file
python get_event_list_example.py --> input/example_pad.evt
python select_template_example.py --> input/example_pad.temp
python get_event_list_example.py --> input/example.evt
python select_template_example.py --> input/example.temp

2. cut template data
python cut_template_example.py
Expand Down
13 changes: 6 additions & 7 deletions example_mess_workdir/config_example.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import sys
sys.path.append('/home/zhouyj/software/PAD')
sys.path.append('/home/zhouyj/software/PAL')
import data_pipeline as dp
import pickers

class Config(object):
def __init__(self):

# Template cut
self.win_len = [15,25] # cut window length
self.win_len = [15,25] # cut window length
self.min_sta = 4 # min sta num for a template events
self.max_sta = 15
self.get_data_dict = dp.get_data_dict
Expand All @@ -18,17 +17,17 @@ def __init__(self):
self.temp_win_p = [0.5,1.5] # win for p pick, pre & post P
self.temp_win_s = [0.5,2.5] # win for s pick, pre & post S
self.trig_thres = 0.25 # cc thres for det & peak expansion
self.expand_len = 2.5 # win len for cc peak expansion
self.expand_len = 2. # win len for cc peak expansion
self.det_gap = 5. # gap sec for detection
self.pick_win_p = [1.5, 1.5] # win for P pick
self.pick_win_s = [2.5, 2.5] # win for S pick
self.chn_p = [2]
self.chn_s = [0,1]
self.amp_win = [1, 5]
self.amp_win = [1, 4]

# data process
self.to_prep = False # if to prep template in run_mess stage
self.samp_rate = 50
self.freq_band = ['bandpass', [1., 40.]]
self.get_amp = pickers.STA_LTA_PCA().get_amp
self.freq_band = [2.,40.]
self.num_workers = 10

12 changes: 6 additions & 6 deletions example_mess_workdir/cut_template_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

# i/o paths
mess_dir = '/home/zhouyj/software/MESS'
data_dir = '/data2/Ridgecrest'
out_root = '/data4/bigdata/zhouyj/Example_templates'
temp_pha = 'input/example_pad.temp'
data_dir = '/data2/Example_data'
out_root = 'output/Example_templates'
temp_pha = 'input/example.temp'

# cut template data
shutil.copyfile('config_example.py', os.path.join(mess_dir, 'config.py'))
os.system("python {}/cut_template_sac.py \
# run MSMS cut temp
shutil.copyfile('config_rc.py', os.path.join(mess_dir, 'config.py'))
os.system("python {}/cut_template_torch.py \
--data_dir={} --temp_pha={} --out_root={}"\
.format(mess_dir, data_dir, temp_pha, out_root))

6 changes: 3 additions & 3 deletions example_mess_workdir/get_event_list_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from obspy import UTCDateTime

# i/o paths
fpha = 'input/example_pad.pha'
fevt = open('input/example_pad.evt','w')
fpha = 'input/example.pha'
fevt = open('input/example.evt','w')

def dtime2str(dtime):
date = ''.join(str(dtime).split('T')[0].split('-'))
Expand All @@ -20,7 +20,7 @@ def dtime2str(dtime):
f=open(fpha); lines=f.readlines(); f.close()
for line in lines:
codes = line.split(',')
if len(codes)!=5: continue
if len(codes[0])<10: continue
event_name = dtime2str(UTCDateTime(codes[0]))
fevt.write('{},{}\n'.format(evid, event_name))
evid += 1
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 7f137b8

Please sign in to comment.