Skip to content

Commit

Permalink
Add MACHO data
Browse files Browse the repository at this point in the history
  • Loading branch information
bnaul committed Nov 12, 2017
1 parent 336c9cf commit dcc0ece
Show file tree
Hide file tree
Showing 7 changed files with 452 additions and 3,930 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Neural network autoencoders for unevenly sampled time series
Code accompanying "An unsupervised neural network that outperforms on
classification of unevenly sampled time series".
Code accompanying "A recurrent neural network for classification of unevenly sampled variable stars".

- Code for scores/figures is found in `figures.ipynb`
- Autoencoder network architecture is defined in `autoencoder.py`
Expand Down
3 changes: 3 additions & 0 deletions data/macho/full.pkl
Git LFS file not shown
4,296 changes: 379 additions & 3,917 deletions figures.ipynb

Large diffs are not rendered by default.

Binary file not shown.
69 changes: 63 additions & 6 deletions light_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,13 @@ def fit_lomb_scargle(self):
def fit_supersmoother(self, periodic=True, scale=True):
from supersmoother import SuperSmoother
model = SuperSmoother(period=self.p if periodic else None)
model.fit(self.times, self.measurements, self.errors)
self.ss_resid = np.sqrt(np.mean((model.predict(self.times) - self.measurements) ** 2))
if scale:
self.ss_resid /= np.std(self.measurements)
try:
model.fit(self.times, self.measurements, self.errors)
self.ss_resid = np.sqrt(np.mean((model.predict(self.times) - self.measurements) ** 2))
if scale:
self.ss_resid /= np.std(self.measurements)
except ValueError:
self.ss_resid = np.inf

def period_fold(self, p=None):
if p is None:
Expand Down Expand Up @@ -129,9 +132,63 @@ def load_linear():
return light_curves


def load_macho():
header_fname = 'data/macho/machovar.dat'
light_curves = []
header = pd.read_table(header_fname, header=None, delim_whitespace=True)
colnames = ['Field', 'Tile', 'Seqn', 'RA_DEC', 'rPer', 'bPer', 'Vmag',
'Rmag', 'rAmp', 'bAmp', 'cAmp', 'rSupRSA', 'bSupRSA', 'rchi2',
'bchi2', 'rsig', 'bsig', 'Var', 'Class', 'Points', 'cPoints',
'rPoints', 'bPoints']
header.columns = colnames
header.index = ['.'.join(str(el) for el in row)
for row in header.values[:, :3]]
LC_types = {
1: 'RRL AB',
2: 'RRL C',
3: 'RRL E',
4: 'Ceph Fund',
5: 'Ceph 1st',
6: 'LPV WoodA',
7: 'LPV WoodB',
8: 'LPV WoodC',
9: 'LPV WoodD',
10: 'EB',
11: 'RRL + GB',
}

import datetime
for i, fname in enumerate(glob.glob('/fastdisks/bnaul/*.txt')):
if i % 100 == 0:
print(f"{i:5d}/{header.shape[0]}", datetime.datetime.now())
df = pd.read_csv(fname, sep=';', header=None)
df.columns = ['t', 'mr', 'er', 'mb', 'eb']
df.drop_duplicates(subset=['t'], keep='first', inplace=True)
df.values[(df.values[:, 1] < -50) | (df.values[:, 2] > 9), 1:3] = np.nan
df.values[(df.values[:, 3] < -50) | (df.values[:, 4] > 9), 3:5] = np.nan
if np.isnan(df.values[:, 1]).all():
continue
df = df[~np.isnan(df['mr'])]
name = '.'.join(os.path.splitext(os.path.basename(fname))[0].split('_')[1:])
inds = np.argsort(df['t'])
lc = LightCurve(name=name, survey='MACHO', times=df['t'].values[inds],
measurements=df['mr'].values[inds],
errors=df['er'].values[inds])
lc.label = LC_types[header.Class.loc[lc.name]]
# lc.fit_lomb_scargle()
lc.p = header.rPer.loc[lc.name]
lc.fit_supersmoother()
light_curves.append(lc)
return light_curves




if __name__ == "__main__":
print("Adding light curve data")
# light_curves = LightCurve.load_asas()
# joblib.dump(light_curves, 'asas.pkl', compress=3)
light_curves = LightCurve.load_linear()
joblib.dump(light_curves, 'linear.pkl', compress=3)
# light_curves = LightCurve.load_linear()
# joblib.dump(light_curves, 'linear.pkl', compress=3)
light_curves = LightCurve.load_macho()
joblib.dump(light_curves, 'macho.pkl', compress=3)
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
numpy
pandas
pandas==0.19.2
scikit-learn
tensorflow
keras==1.2.2
Expand Down
9 changes: 5 additions & 4 deletions survey_autoencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
from light_curve import LightCurve


def preprocess(X_raw, m_max=None):
def preprocess(X_raw, m_max=np.inf):
X = X_raw.copy()

if m_max:
wrong_units = np.nanmax(X[:, :, 1], axis=1) > m_max
X = X[~wrong_units, :, :]
wrong_units = np.all(np.isnan(X[:, :, 1])) | (np.nanmax(X[:, :, 1], axis=1) > m_max)
X = X[~wrong_units, :, :]

# Replace times w/ lags
X[:, :, 0] = ku.times_to_lags(X[:, :, 0])
Expand Down Expand Up @@ -60,6 +59,8 @@ def main(args=None):
X_list = [np.c_[lc.times, lc.measurements, lc.errors] for lc in split]

X_raw = pad_sequences(X_list, value=np.nan, dtype='float', padding='post')
if args.N_train is not None:
X_raw = X_raw[:args.N_train]

model_type_dict = {'gru': GRU, 'lstm': LSTM, 'vanilla': SimpleRNN}
X, means, scales, wrong_units = preprocess(X_raw, args.m_max)
Expand Down

0 comments on commit dcc0ece

Please sign in to comment.