Reading the data
[15]:
PATH_DIR = '/content/drive/MyDrive/01 - Iniciação Científica/02 - Datasets/exoplanets_confirmed/resampled_files/RESAMPLED_EN2_STAR_CHR_0101086161_20070516T060226_20071005T074409.csv'
# PATH_DIR = 'C:/Users/guisa/Google Drive/01 - Iniciação Científica/02 - Datasets/exoplanets_confirmed/resampled_files/RESAMPLED_EN2_STAR_CHR_0101086161_20070516T060226_20071005T074409.csv'
# PATH_DIR = 'C:/Users/guisa/Google Drive/01 - Iniciação Científica/02 - Datasets/exoplanets_confirmed/resampled_files/RESAMPLED_EN2_STAR_CHR_0102890318_20070206T133547_20070402T070302.csv'
[16]:
import pandas as pd
data = pd.read_csv(PATH_DIR)
data.head()
[16]:
| DATE | WHITEFLUX | |
|---|---|---|
| 0 | 2007-05-16 18:10:55.071642 | 112521.329834 |
| 1 | 2007-05-16 18:24:29.950108 | 112758.045853 |
| 2 | 2007-05-16 18:38:04.828574 | 112943.042225 |
| 3 | 2007-05-16 18:51:39.707040 | 112562.266242 |
| 4 | 2007-05-16 19:05:14.585506 | 112789.303079 |
[17]:
from datetime import datetime
time = data.DATE
flux = data.WHITEFLUX
try:
time = [datetime.strptime(i, '%Y-%m-%d %H:%M:%S.%f') for i in time]
except:
time = [datetime.strptime(i, '%Y-%m-%d %H:%M:%S') for i in time]
After, let’s import the tools package with the support algorithms
[ ]:
from tools import *
Plotting the chosen curve
[ ]:
viz.view_lightcurve(time, flux)
Feature: Periodograms
Spectrum generation
[ ]:
sample_time = pd.Series(time).diff().min()
sample_frequency = 1 / sample_time.seconds
print("The series have a time sample of {} minutes, so the sample frequency is {} Hz".format(sample_time, round(sample_frequency, 6)))
The series have a time sample of 0 days 00:13:34.878465 minutes, so the sample frequency is 0.001229 Hz
[ ]:
import scipy.signal as ssg
flux = ssg.detrend(flux, type='linear')
[ ]:
X, Y = ssg.periodogram(flux, fs=sample_frequency, scaling='density')
[ ]:
import numpy as np
viz.line_plot(np.log10(X), Y, title='Light Curve Frequency Spectrum', x_axis='Frequency [log10(Hz)]', y_axis='Magnitude')
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: RuntimeWarning: divide by zero encountered in log10
This is separate from the ipykernel package so we can avoid doing imports until
Generation algorithm
[ ]:
import numpy as np
s = X.shape
periodogram = np.zeros((s[0], 2))
[ ]:
eclipsing_binaries = False
exoplanets_confirmed = True
if eclipsing_binaries:
DATA_DIR = '/content/drive/MyDrive/01 - Iniciação Científica/02 - Datasets/eclipsing_binaries'
elif exoplanets_confirmed:
DATA_DIR = '/content/drive/MyDrive/01 - Iniciação Científica/02 - Datasets/exoplanets_confirmed/resampled_files'
[18]:
import os
DF = pd.DataFrame()
for root_dir_path, sub_dirs, files in os.walk(DATA_DIR):
for j in range(0, len(files)):
if files[j].endswith('.csv'):
path = root_dir_path + "/" + files[j]
data = pd.read_csv(path)
time = data.DATE
try:
time = [datetime.strptime(i, '%Y-%m-%d %H:%M:%S.%f') for i in time]
except:
time = [datetime.strptime(i, '%Y-%m-%d %H:%M:%S') for i in time]
flux = data.WHITEFLUX
sample_time = pd.Series(time).diff().min()
sample_frequency = 1 / sample_time.seconds
# Detrend data
detrend_flux = ssg.detrend(flux, type='linear')
# Create the periodogram
freq, spec = ssg.periodogram(detrend_flux, fs=sample_frequency)
# Save the data on a pd.DataFrame
DF = DF.append(pd.Series(spec), ignore_index=True)
[19]:
DF.head()
[19]:
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | ... | 7486 | 7487 | 7488 | 7489 | 7490 | 7491 | 7492 | 7493 | 7494 | 7495 | 7496 | 7497 | 7498 | 7499 | 7500 | 7501 | 7502 | 7503 | 7504 | 7505 | 7506 | 7507 | 7508 | 7509 | 7510 | 7511 | 7512 | 7513 | 7514 | 7515 | 7516 | 7517 | 7518 | 7519 | 7520 | 7521 | 7522 | 7523 | 7524 | 7525 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3.698734e-21 | 2.187372e+10 | 4.653541e+09 | 2.977435e+10 | 5.847694e+09 | 1.299638e+10 | 9.043435e+09 | 1.605398e+09 | 4.163878e+08 | 6.495074e+08 | 8.126626e+09 | 9.406637e+09 | 2.139047e+09 | 7.464040e+08 | 8.710111e+08 | 3.066070e+08 | 1.722429e+09 | 1.694147e+09 | 9.865663e+08 | 8.465616e+08 | 1.380645e+09 | 3.179946e+07 | 1.181358e+09 | 7.756270e+08 | 1.003634e+08 | 6.288176e+08 | 3.771828e+08 | 8.828726e+08 | 1.492417e+09 | 1.647794e+08 | 1.810905e+09 | 1.071801e+10 | 1.822772e+10 | 8.686493e+08 | 1.672761e+09 | 4.151073e+08 | 1.422037e+08 | 3.095936e+08 | 4.878045e+07 | 3.732714e+08 | ... | 5.859518e+06 | 1.802153e+07 | 6.298346e+06 | 4.934046e+06 | 3.243160e+07 | 4.082484e+06 | 1.900234e+07 | 1.505044e+07 | 8.717856e+06 | 1.667094e+07 | 2.006054e+07 | 1.210509e+06 | 1.945082e+07 | 7.096113e+06 | 2.315154e+07 | 4.012920e+07 | 1.055361e+06 | 4.469378e+06 | 3.237999e+05 | 4.170387e+06 | 1.672472e+07 | 3.123520e+07 | 1.453152e+06 | 3.700304e+07 | 1.612176e+06 | 1.006348e+07 | 2.784085e+06 | 1.442023e+06 | 1.009989e+07 | 5.640032e+06 | 3.093162e+06 | 6.324198e+07 | 5.984340e+06 | 5.636384e+07 | 6.359579e+06 | 2.165289e+07 | 3.362567e+07 | 1.639384e+06 | 7.700957e+07 | 4.122675e+05 |
| 1 | 3.999557e-18 | 5.784624e+13 | 5.242573e+12 | 1.114427e+12 | 8.653755e+12 | 3.495333e+13 | 6.333985e+13 | 1.223317e+13 | 7.133227e+11 | 3.332728e+12 | 6.307572e+12 | 1.189972e+13 | 1.339273e+13 | 4.585274e+12 | 2.595879e+12 | 1.647835e+11 | 1.322158e+12 | 3.092180e+12 | 1.231179e+11 | 8.636186e+10 | 3.534272e+10 | 1.728992e+11 | 1.773730e+10 | 1.762227e+11 | 3.180256e+10 | 1.703417e+11 | 2.373593e+10 | 5.385853e+10 | 2.690612e+10 | 7.111801e+10 | 8.931761e+10 | 1.096729e+11 | 3.603436e+10 | 2.655039e+08 | 3.324433e+10 | 1.137900e+11 | 5.872237e+10 | 3.719629e+10 | 4.964274e+10 | 3.085536e+10 | ... | 1.209086e+09 | 1.702009e+09 | 3.732716e+08 | 3.764944e+07 | 6.530433e+08 | 3.449799e+08 | 2.953255e+06 | 5.937585e+08 | 8.981582e+07 | 3.058008e+08 | 1.005418e+08 | 3.397017e+08 | 3.034542e+08 | 1.506536e+08 | 2.785713e+08 | 6.444151e+08 | 2.399501e+08 | 9.712044e+08 | 3.309554e+08 | 5.073232e+08 | 1.480754e+09 | 4.535894e+08 | 9.340738e+06 | 2.035568e+08 | 7.644298e+08 | 2.158474e+08 | 8.841379e+08 | 2.560128e+08 | 1.858450e+08 | 1.644058e+08 | 7.884955e+08 | 8.525404e+07 | 5.671301e+08 | 6.771197e+08 | 8.770928e+07 | 7.573039e+06 | 2.590354e+08 | 3.938666e+07 | 2.073784e+08 | 3.598600e+08 |
| 2 | 3.301438e-21 | 1.656106e+11 | 4.241954e+10 | 1.295592e+10 | 1.441850e+10 | 7.333357e+09 | 3.654204e+09 | 8.038780e+08 | 2.345564e+09 | 3.428295e+08 | 1.486851e+08 | 4.355384e+07 | 1.922543e+09 | 1.190937e+09 | 1.042278e+09 | 2.177381e+09 | 2.186346e+08 | 2.266618e+08 | 1.933830e+08 | 1.024137e+09 | 6.308557e+07 | 3.175023e+08 | 3.831862e+08 | 1.347591e+08 | 4.991692e+07 | 6.616579e+07 | 3.089654e+08 | 2.338376e+08 | 1.757878e+08 | 1.022475e+08 | 1.619658e+08 | 2.161862e+07 | 7.733964e+07 | 1.881846e+08 | 2.409856e+07 | 1.216204e+08 | 5.106637e+07 | 2.844362e+07 | 1.700110e+08 | 1.287244e+07 | ... | 6.336029e+06 | 2.901623e+06 | 9.709432e+06 | 6.766949e+06 | 3.311118e+07 | 3.102628e+06 | 6.014224e+06 | 1.052508e+07 | 1.792224e+06 | 5.597886e+06 | 8.938615e+06 | 2.654949e+06 | 3.223749e+05 | 1.365606e+07 | 5.680955e+06 | 1.315171e+07 | 1.306965e+07 | 1.334836e+06 | 2.123376e+07 | 3.334838e+04 | 4.742393e+06 | 6.822047e+05 | 4.105110e+07 | 9.704217e+06 | 1.313942e+06 | 7.024191e+06 | 3.920979e+05 | 1.611153e+07 | 8.167207e+06 | 1.913767e+07 | 3.936906e+06 | 4.087972e+06 | 2.124753e+07 | 1.382830e+07 | 3.013891e+06 | 1.112224e+07 | 1.568576e+07 | 6.561321e+06 | 2.489063e+06 | 2.142671e+05 |
| 3 | 2.347411e-21 | 1.446522e+11 | 2.926439e+10 | 8.856258e+10 | 1.267793e+11 | 7.801648e+10 | 9.524482e+10 | 5.849805e+10 | 2.288253e+10 | 2.312979e+10 | 2.931245e+10 | 2.805120e+10 | 3.957929e+10 | 6.142179e+10 | 6.216595e+10 | 2.295419e+10 | 9.917353e+09 | 2.422448e+10 | 2.340713e+10 | 2.166599e+10 | 9.182803e+09 | 6.499000e+09 | 6.344703e+09 | 4.445389e+09 | 1.498852e+09 | 9.027348e+08 | 2.036861e+09 | 2.321052e+09 | 3.358732e+09 | 2.087908e+09 | 1.687891e+09 | 1.970376e+09 | 1.535445e+09 | 6.515868e+08 | 1.452884e+09 | 4.013471e+09 | 9.788411e+08 | 1.176883e+09 | 3.133791e+09 | 4.906828e+09 | ... | 2.277813e+06 | 4.568980e+06 | 3.991391e+06 | 7.130943e+06 | 1.204470e+07 | 3.825498e+06 | 9.432747e+05 | 2.249743e+07 | 1.022491e+06 | 3.314838e+06 | 5.426386e+06 | 1.971414e+07 | 1.918997e+06 | 5.693808e+06 | 1.010783e+06 | 8.312123e+05 | 1.079153e+07 | 5.020188e+06 | 1.862797e+06 | 1.532614e+07 | 2.007534e+06 | 4.360234e+06 | 1.105087e+07 | 2.224375e+06 | 2.473986e+06 | 7.483401e+06 | 2.209326e+07 | 2.079834e+06 | 4.852925e+06 | 1.833137e+07 | 1.312425e+06 | 1.618716e+06 | 6.161822e+05 | 7.069934e+06 | 8.741769e+05 | 1.412689e+07 | 2.782050e+07 | 8.039433e+06 | 1.307975e+07 | 3.595673e+06 |
| 4 | 1.514690e-21 | 2.660556e+12 | 8.750812e+11 | 5.456667e+11 | 8.604106e+11 | 5.928171e+12 | 3.845155e+11 | 6.003512e+10 | 1.230223e+12 | 5.696613e+11 | 5.510373e+11 | 1.229502e+11 | 4.331146e+11 | 6.536904e+11 | 1.155310e+11 | 3.729638e+11 | 6.861902e+11 | 7.168059e+11 | 1.163156e+12 | 6.739932e+11 | 6.537787e+11 | 9.457468e+11 | 1.641149e+12 | 9.234042e+11 | 3.391170e+11 | 2.400593e+12 | 1.125012e+12 | 1.619200e+13 | 4.257016e+13 | 1.386066e+14 | 1.251609e+14 | 2.513605e+14 | 6.976831e+13 | 2.252078e+13 | 1.101076e+13 | 8.750051e+12 | 4.718911e+12 | 2.135353e+12 | 2.630677e+12 | 1.462807e+12 | ... | 8.021413e+07 | 9.226670e+07 | 6.950245e+07 | 2.063020e+07 | 1.330526e+08 | 5.228473e+07 | 8.631832e+07 | 7.812558e+07 | 5.874413e+07 | 7.679085e+08 | 8.844318e+06 | 1.591009e+08 | 6.224376e+07 | 1.198233e+07 | 1.006110e+08 | 2.624271e+08 | 5.238574e+07 | 2.207024e+07 | 8.199011e+07 | 9.283523e+07 | 1.381196e+08 | 8.889676e+07 | 2.658051e+07 | 1.604157e+07 | 9.881702e+06 | 1.427925e+08 | 3.425586e+07 | 1.080499e+08 | 8.890381e+07 | 7.306812e+07 | 3.266698e+07 | 4.452323e+07 | 1.256674e+06 | 1.378539e+08 | 5.313656e+07 | 1.253566e+07 | 2.295495e+07 | 1.237998e+08 | 7.109648e+07 | 1.491803e+08 |
5 rows × 7526 columns
Saving feature
[ ]:
file_name = 'feature_periodograms.csv'
DF.to_csv(file_name, index=False)