Reading the data

[15]:
PATH_DIR = '/content/drive/MyDrive/01 - Iniciação Científica/02 - Datasets/exoplanets_confirmed/resampled_files/RESAMPLED_EN2_STAR_CHR_0101086161_20070516T060226_20071005T074409.csv'
# PATH_DIR = 'C:/Users/guisa/Google Drive/01 - Iniciação Científica/02 - Datasets/exoplanets_confirmed/resampled_files/RESAMPLED_EN2_STAR_CHR_0101086161_20070516T060226_20071005T074409.csv'
# PATH_DIR = 'C:/Users/guisa/Google Drive/01 - Iniciação Científica/02 - Datasets/exoplanets_confirmed/resampled_files/RESAMPLED_EN2_STAR_CHR_0102890318_20070206T133547_20070402T070302.csv'
[16]:
import pandas as pd

data = pd.read_csv(PATH_DIR)
data.head()
[16]:
DATE WHITEFLUX
0 2007-05-16 18:10:55.071642 112521.329834
1 2007-05-16 18:24:29.950108 112758.045853
2 2007-05-16 18:38:04.828574 112943.042225
3 2007-05-16 18:51:39.707040 112562.266242
4 2007-05-16 19:05:14.585506 112789.303079
[17]:
from datetime import datetime

time = data.DATE
flux = data.WHITEFLUX

try:
  time = [datetime.strptime(i, '%Y-%m-%d %H:%M:%S.%f') for i in time]
except:
  time = [datetime.strptime(i, '%Y-%m-%d %H:%M:%S') for i in time]

After, let’s import the tools package with the support algorithms

[ ]:
from tools import *

Plotting the chosen curve

[ ]:
viz.view_lightcurve(time, flux)

Feature: Periodograms


Spectrum generation

[ ]:
sample_time = pd.Series(time).diff().min()
sample_frequency = 1 / sample_time.seconds

print("The series have a time sample of {} minutes, so the sample frequency is {} Hz".format(sample_time, round(sample_frequency, 6)))
The series have a time sample of 0 days 00:13:34.878465 minutes, so the sample frequency is 0.001229 Hz
[ ]:
import scipy.signal as ssg

flux = ssg.detrend(flux, type='linear')
[ ]:
X, Y = ssg.periodogram(flux, fs=sample_frequency, scaling='density')
[ ]:
import numpy as np

viz.line_plot(np.log10(X), Y, title='Light Curve Frequency Spectrum', x_axis='Frequency [log10(Hz)]', y_axis='Magnitude')
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: RuntimeWarning: divide by zero encountered in log10
  This is separate from the ipykernel package so we can avoid doing imports until

Generation algorithm

[ ]:
import numpy as np

s = X.shape
periodogram = np.zeros((s[0], 2))
[ ]:
eclipsing_binaries = False
exoplanets_confirmed = True

if eclipsing_binaries:
  DATA_DIR = '/content/drive/MyDrive/01 - Iniciação Científica/02 - Datasets/eclipsing_binaries'

elif exoplanets_confirmed:
  DATA_DIR = '/content/drive/MyDrive/01 - Iniciação Científica/02 - Datasets/exoplanets_confirmed/resampled_files'
[18]:
import os

DF = pd.DataFrame()

for root_dir_path, sub_dirs, files in os.walk(DATA_DIR):
    for j in range(0, len(files)):
        if files[j].endswith('.csv'):
            path = root_dir_path + "/" + files[j]
            data = pd.read_csv(path)
            time = data.DATE
            try:
              time = [datetime.strptime(i, '%Y-%m-%d %H:%M:%S.%f') for i in time]
            except:
              time = [datetime.strptime(i, '%Y-%m-%d %H:%M:%S') for i in time]

            flux = data.WHITEFLUX

            sample_time = pd.Series(time).diff().min()
            sample_frequency = 1 / sample_time.seconds

            # Detrend data
            detrend_flux = ssg.detrend(flux, type='linear')

            # Create the periodogram
            freq, spec = ssg.periodogram(detrend_flux, fs=sample_frequency)

            # Save the data on a pd.DataFrame
            DF = DF.append(pd.Series(spec), ignore_index=True)

[19]:
DF.head()
[19]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 ... 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525
0 3.698734e-21 2.187372e+10 4.653541e+09 2.977435e+10 5.847694e+09 1.299638e+10 9.043435e+09 1.605398e+09 4.163878e+08 6.495074e+08 8.126626e+09 9.406637e+09 2.139047e+09 7.464040e+08 8.710111e+08 3.066070e+08 1.722429e+09 1.694147e+09 9.865663e+08 8.465616e+08 1.380645e+09 3.179946e+07 1.181358e+09 7.756270e+08 1.003634e+08 6.288176e+08 3.771828e+08 8.828726e+08 1.492417e+09 1.647794e+08 1.810905e+09 1.071801e+10 1.822772e+10 8.686493e+08 1.672761e+09 4.151073e+08 1.422037e+08 3.095936e+08 4.878045e+07 3.732714e+08 ... 5.859518e+06 1.802153e+07 6.298346e+06 4.934046e+06 3.243160e+07 4.082484e+06 1.900234e+07 1.505044e+07 8.717856e+06 1.667094e+07 2.006054e+07 1.210509e+06 1.945082e+07 7.096113e+06 2.315154e+07 4.012920e+07 1.055361e+06 4.469378e+06 3.237999e+05 4.170387e+06 1.672472e+07 3.123520e+07 1.453152e+06 3.700304e+07 1.612176e+06 1.006348e+07 2.784085e+06 1.442023e+06 1.009989e+07 5.640032e+06 3.093162e+06 6.324198e+07 5.984340e+06 5.636384e+07 6.359579e+06 2.165289e+07 3.362567e+07 1.639384e+06 7.700957e+07 4.122675e+05
1 3.999557e-18 5.784624e+13 5.242573e+12 1.114427e+12 8.653755e+12 3.495333e+13 6.333985e+13 1.223317e+13 7.133227e+11 3.332728e+12 6.307572e+12 1.189972e+13 1.339273e+13 4.585274e+12 2.595879e+12 1.647835e+11 1.322158e+12 3.092180e+12 1.231179e+11 8.636186e+10 3.534272e+10 1.728992e+11 1.773730e+10 1.762227e+11 3.180256e+10 1.703417e+11 2.373593e+10 5.385853e+10 2.690612e+10 7.111801e+10 8.931761e+10 1.096729e+11 3.603436e+10 2.655039e+08 3.324433e+10 1.137900e+11 5.872237e+10 3.719629e+10 4.964274e+10 3.085536e+10 ... 1.209086e+09 1.702009e+09 3.732716e+08 3.764944e+07 6.530433e+08 3.449799e+08 2.953255e+06 5.937585e+08 8.981582e+07 3.058008e+08 1.005418e+08 3.397017e+08 3.034542e+08 1.506536e+08 2.785713e+08 6.444151e+08 2.399501e+08 9.712044e+08 3.309554e+08 5.073232e+08 1.480754e+09 4.535894e+08 9.340738e+06 2.035568e+08 7.644298e+08 2.158474e+08 8.841379e+08 2.560128e+08 1.858450e+08 1.644058e+08 7.884955e+08 8.525404e+07 5.671301e+08 6.771197e+08 8.770928e+07 7.573039e+06 2.590354e+08 3.938666e+07 2.073784e+08 3.598600e+08
2 3.301438e-21 1.656106e+11 4.241954e+10 1.295592e+10 1.441850e+10 7.333357e+09 3.654204e+09 8.038780e+08 2.345564e+09 3.428295e+08 1.486851e+08 4.355384e+07 1.922543e+09 1.190937e+09 1.042278e+09 2.177381e+09 2.186346e+08 2.266618e+08 1.933830e+08 1.024137e+09 6.308557e+07 3.175023e+08 3.831862e+08 1.347591e+08 4.991692e+07 6.616579e+07 3.089654e+08 2.338376e+08 1.757878e+08 1.022475e+08 1.619658e+08 2.161862e+07 7.733964e+07 1.881846e+08 2.409856e+07 1.216204e+08 5.106637e+07 2.844362e+07 1.700110e+08 1.287244e+07 ... 6.336029e+06 2.901623e+06 9.709432e+06 6.766949e+06 3.311118e+07 3.102628e+06 6.014224e+06 1.052508e+07 1.792224e+06 5.597886e+06 8.938615e+06 2.654949e+06 3.223749e+05 1.365606e+07 5.680955e+06 1.315171e+07 1.306965e+07 1.334836e+06 2.123376e+07 3.334838e+04 4.742393e+06 6.822047e+05 4.105110e+07 9.704217e+06 1.313942e+06 7.024191e+06 3.920979e+05 1.611153e+07 8.167207e+06 1.913767e+07 3.936906e+06 4.087972e+06 2.124753e+07 1.382830e+07 3.013891e+06 1.112224e+07 1.568576e+07 6.561321e+06 2.489063e+06 2.142671e+05
3 2.347411e-21 1.446522e+11 2.926439e+10 8.856258e+10 1.267793e+11 7.801648e+10 9.524482e+10 5.849805e+10 2.288253e+10 2.312979e+10 2.931245e+10 2.805120e+10 3.957929e+10 6.142179e+10 6.216595e+10 2.295419e+10 9.917353e+09 2.422448e+10 2.340713e+10 2.166599e+10 9.182803e+09 6.499000e+09 6.344703e+09 4.445389e+09 1.498852e+09 9.027348e+08 2.036861e+09 2.321052e+09 3.358732e+09 2.087908e+09 1.687891e+09 1.970376e+09 1.535445e+09 6.515868e+08 1.452884e+09 4.013471e+09 9.788411e+08 1.176883e+09 3.133791e+09 4.906828e+09 ... 2.277813e+06 4.568980e+06 3.991391e+06 7.130943e+06 1.204470e+07 3.825498e+06 9.432747e+05 2.249743e+07 1.022491e+06 3.314838e+06 5.426386e+06 1.971414e+07 1.918997e+06 5.693808e+06 1.010783e+06 8.312123e+05 1.079153e+07 5.020188e+06 1.862797e+06 1.532614e+07 2.007534e+06 4.360234e+06 1.105087e+07 2.224375e+06 2.473986e+06 7.483401e+06 2.209326e+07 2.079834e+06 4.852925e+06 1.833137e+07 1.312425e+06 1.618716e+06 6.161822e+05 7.069934e+06 8.741769e+05 1.412689e+07 2.782050e+07 8.039433e+06 1.307975e+07 3.595673e+06
4 1.514690e-21 2.660556e+12 8.750812e+11 5.456667e+11 8.604106e+11 5.928171e+12 3.845155e+11 6.003512e+10 1.230223e+12 5.696613e+11 5.510373e+11 1.229502e+11 4.331146e+11 6.536904e+11 1.155310e+11 3.729638e+11 6.861902e+11 7.168059e+11 1.163156e+12 6.739932e+11 6.537787e+11 9.457468e+11 1.641149e+12 9.234042e+11 3.391170e+11 2.400593e+12 1.125012e+12 1.619200e+13 4.257016e+13 1.386066e+14 1.251609e+14 2.513605e+14 6.976831e+13 2.252078e+13 1.101076e+13 8.750051e+12 4.718911e+12 2.135353e+12 2.630677e+12 1.462807e+12 ... 8.021413e+07 9.226670e+07 6.950245e+07 2.063020e+07 1.330526e+08 5.228473e+07 8.631832e+07 7.812558e+07 5.874413e+07 7.679085e+08 8.844318e+06 1.591009e+08 6.224376e+07 1.198233e+07 1.006110e+08 2.624271e+08 5.238574e+07 2.207024e+07 8.199011e+07 9.283523e+07 1.381196e+08 8.889676e+07 2.658051e+07 1.604157e+07 9.881702e+06 1.427925e+08 3.425586e+07 1.080499e+08 8.890381e+07 7.306812e+07 3.266698e+07 4.452323e+07 1.256674e+06 1.378539e+08 5.313656e+07 1.253566e+07 2.295495e+07 1.237998e+08 7.109648e+07 1.491803e+08

5 rows × 7526 columns

Saving feature

[ ]:
file_name = 'feature_periodograms.csv'

DF.to_csv(file_name, index=False)