Advertisement
mayankjoin3

Appaji Data Filtering

Nov 19th, 2024
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 13.20 KB | None | 0 0
  1. from copy import deepcopy
  2. import csv
  3. import numpy as np
  4. from datetime import datetime
  5. from statistics import stdev
  6.  
  7.  
  8. class DetectSpikes:
  9.     def __init__(
  10.         self,
  11.         Data,
  12.         CORR_THRESHOLD,
  13.         SNR_THRESHOLD,
  14.         VELOCITY_MULTIPLIER,
  15.         CORR_COLS=[15, 16, 17],
  16.         SNR_COLS=[11, 12, 13],
  17.         VELOCITY_COLS=[3, 4, 5],
  18.     ) -> None:
  19.         self.CORR_THRESHOLD = CORR_THRESHOLD
  20.         self.SNR_THRESHOLD = SNR_THRESHOLD
  21.         self.VELOCITY_MULTIPLIER = VELOCITY_MULTIPLIER
  22.         self.Data = Data
  23.         self.CORR_COLS = CORR_COLS
  24.         self.SNR_COLS = SNR_COLS
  25.         self.VELOCITY_COLS = VELOCITY_COLS
  26.  
  27.         self.STD_DEV = {}  # calculating stdev for velocity cols is enough for now
  28.         for col in VELOCITY_COLS:
  29.             data = []
  30.             for row in self.Data:
  31.                 data.append(row[col])
  32.             self.STD_DEV[col] = stdev(data)
  33.  
  34.         self.detection_methods = [
  35.             self.minimum_CORR,
  36.             self.average_CORR,
  37.             self.minimum_SNR,
  38.             self.average_SNR,
  39.             self.min_CORR_min_SNR,
  40.             self.min_CORR_avg_SNR,
  41.             self.avg_CORR_min_SNR,
  42.             self.avg_CORR_avg_SNR,
  43.             self.velocity_threshold,
  44.             self.abs_velocity_threshold,
  45.         ]
  46.  
  47.     def check_threshold(self, row, cols, threshold):
  48.         for col in cols:
  49.             if row[col] < threshold:
  50.                 return True
  51.  
  52.     def check_average(self, row, cols, threshold):
  53.         average = 0
  54.         for col in cols:
  55.             average += row[col]
  56.         average /= len(cols)
  57.         return average < threshold
  58.  
  59.     def check_velocity_threshold(self, row, multiplier):
  60.         normal_sum = 0
  61.         stdev_normal_sum = 0
  62.         for col in self.VELOCITY_COLS:
  63.             normal_sum += row[col]
  64.             stdev_normal_sum += self.STD_DEV[col]
  65.         normal_sum = abs(normal_sum)
  66.         return abs(normal_sum) > multiplier * abs(stdev_normal_sum)
  67.  
  68.     def check_abs_velocity_threshold(self, row, multiplier):
  69.         spikes = 0
  70.         for col in self.VELOCITY_COLS:
  71.             if abs(row[col]) > multiplier * abs(self.STD_DEV[col]):
  72.                 spikes += 1
  73.         return spikes >= 2
  74.  
  75.     def detect_and_replace(self, conditions, replacement_method, filename):
  76.         """
  77.        conditions is a list of list of two objects:
  78.        - function
  79.        - args to the function
  80.        For every row in the Data, we call all the functions in 'conditions' list.
  81.        If all of them returns True for that row, we run replacement method on that row.
  82.        To summarize, we check AND of all conditions in 'conditions' list.
  83.        """
  84.         replaced_rows = 0
  85.         for i, row in enumerate(self.Data):
  86.             all_True = True
  87.             for condition in conditions:
  88.                 if not condition[0](*([row] + condition[1])):
  89.                     all_True = False
  90.             if all_True:
  91.                 replaced_rows += 1
  92.                 replacement_method[0](i)
  93.         print(f"{replaced_rows} row(s) replaced.")
  94.         return filename + "_" + replacement_method[1]
  95.  
  96.     def minimum_CORR(self, replacement_method):
  97.         return self.detect_and_replace(
  98.             [[self.check_threshold, [self.CORR_COLS, self.CORR_THRESHOLD]]],
  99.             replacement_method,
  100.             f"min_CORR_{self.CORR_THRESHOLD}",
  101.         )
  102.  
  103.     def average_CORR(self, replacement_method):
  104.         return self.detect_and_replace(
  105.             [[self.check_average, [self.CORR_COLS, self.CORR_THRESHOLD]]],
  106.             replacement_method,
  107.             f"avg_CORR_{self.CORR_THRESHOLD}",
  108.         )
  109.  
  110.     def minimum_SNR(self, replacement_method):
  111.         return self.detect_and_replace(
  112.             [[self.check_threshold, [self.SNR_COLS, self.SNR_THRESHOLD]]],
  113.             replacement_method,
  114.             f"min_SNR_{self.SNR_THRESHOLD}",
  115.         )
  116.  
  117.     def average_SNR(self, replacement_method):
  118.         return self.detect_and_replace(
  119.             [[self.check_average, [self.SNR_COLS, self.SNR_THRESHOLD]]],
  120.             replacement_method,
  121.             f"avg_SNR_{self.SNR_THRESHOLD}",
  122.         )
  123.  
  124.     def min_CORR_min_SNR(self, replacement_method):
  125.         return self.detect_and_replace(
  126.             [
  127.                 [self.check_threshold, [self.CORR_COLS, self.CORR_THRESHOLD]],
  128.                 [self.check_threshold, [self.SNR_COLS, self.SNR_THRESHOLD]],
  129.             ],
  130.             replacement_method,
  131.             f"min_CORR_{self.CORR_THRESHOLD}_min_SNR_{self.SNR_THRESHOLD}",
  132.         )
  133.  
  134.     def min_CORR_avg_SNR(self, replacement_method):
  135.         return self.detect_and_replace(
  136.             [
  137.                 [self.check_threshold, [self.CORR_COLS, self.CORR_THRESHOLD]],
  138.                 [self.check_average, [self.SNR_COLS, self.SNR_THRESHOLD]],
  139.             ],
  140.             replacement_method,
  141.             f"min_CORR_{self.CORR_THRESHOLD}_avg_SNR_{self.SNR_THRESHOLD}",
  142.         )
  143.  
  144.     def avg_CORR_min_SNR(self, replacement_method):
  145.         return self.detect_and_replace(
  146.             [
  147.                 [self.check_average, [self.CORR_COLS, self.CORR_THRESHOLD]],
  148.                 [self.check_threshold, [self.SNR_COLS, self.SNR_THRESHOLD]],
  149.             ],
  150.             replacement_method,
  151.             f"avg_CORR_{self.CORR_THRESHOLD}_min_SNR_{self.SNR_THRESHOLD}",
  152.         )
  153.  
  154.     def avg_CORR_avg_SNR(self, replacement_method):
  155.         return self.detect_and_replace(
  156.             [
  157.                 [self.check_average, [self.CORR_COLS, self.CORR_THRESHOLD]],
  158.                 [self.check_average, [self.SNR_COLS, self.SNR_THRESHOLD]],
  159.             ],
  160.             replacement_method,
  161.             f"avg_CORR_{self.CORR_THRESHOLD}_avg_SNR_{self.SNR_THRESHOLD}",
  162.         )
  163.  
  164.     def velocity_threshold(self, replacement_method):
  165.         return self.detect_and_replace(
  166.             [[self.check_velocity_threshold, [self.VELOCITY_MULTIPLIER]]],
  167.             replacement_method,
  168.             f"v_threshold_{self.VELOCITY_MULTIPLIER}",
  169.         )
  170.  
  171.     def abs_velocity_threshold(self, replacement_method):
  172.         return self.detect_and_replace(
  173.             [[self.check_abs_velocity_threshold, [self.VELOCITY_MULTIPLIER]]],
  174.             replacement_method,
  175.             f"abs_v_threshold_{self.VELOCITY_MULTIPLIER}",
  176.         )
  177.  
  178.  
  179. class ReplaceSpikes:
  180.     def __init__(
  181.         self,
  182.         Data,
  183.         RAW_COLS=[3, 4, 5],
  184.     ) -> None:
  185.         self.Data = Data
  186.  
  187.         # Calculating Column means for further use
  188.         column_count = len(Data[0])
  189.         self.ColMeans = [0] * column_count
  190.         for i in range(column_count):
  191.             AVG = 0
  192.             for row in Data:
  193.                 AVG += row[i]
  194.             AVG /= len(Data)
  195.             self.ColMeans[i] = AVG
  196.         self.RAW_COLS = RAW_COLS
  197.  
  198.         self.replacement_methods = [
  199.             [self.f1, "R1"],
  200.             [self.f2, "R2"],
  201.             [self.f3, "R3"],
  202.             [self.f4, "R4"],
  203.             [self.f5, "R5"],
  204.         ]
  205.  
  206.     def f1(self, row_index):
  207.         for j in self.RAW_COLS:
  208.             try:
  209.                 self.Data[row_index][j] = self.Data[row_index - 1][j]
  210.             except:
  211.                 self.Data[row_index][j] = self.Data[row_index][j]
  212.  
  213.     def f2(self, row_index):
  214.         for j in self.RAW_COLS:
  215.             try:
  216.                 self.Data[row_index][j] = (
  217.                     2 * self.Data[row_index - 1][j] - self.Data[row_index - 2][j]
  218.                 )
  219.             except:
  220.                 self.Data[row_index][j] = self.Data[row_index][j]
  221.  
  222.     def f3(self, row_index):
  223.         for j in self.RAW_COLS:
  224.             self.Data[row_index][j] = self.ColMeans[j]
  225.  
  226.     def f4(self, row_index):
  227.         # TODO
  228.         for j in self.RAW_COLS:
  229.             self.Data[row_index][j] = self.Data[row_index][j]
  230.  
  231.     def f5(self, row_index):
  232.         for j in self.RAW_COLS:
  233.             try:
  234.                 self.Data[row_index][j] = round(
  235.                     (self.Data[row_index - 1][j] + self.Data[row_index + 1][j]) / 2, 5
  236.                 )
  237.             except IndexError:
  238.                 self.Data[row_index][j] = self.Data[row_index][j]
  239.  
  240.  
  241. def write_to_file(
  242.     Data, COLS, filename, Headers=["TIME", "FILTERED_U", "FILTERED_V", "FILTERED_W"]
  243. ):
  244.     if filename[:-4] != ".csv":
  245.         filename += ".csv"
  246.     filename = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + "_" + filename
  247.     filename = "OUTPUT_DIR/" + filename
  248.     with open(filename, "w", newline="") as f:
  249.         writer = csv.writer(f)
  250.         writer.writerow(Headers)  # header
  251.         for row in Data:
  252.             row_data = []
  253.             for j in COLS:
  254.                 row_data.append(row[j] * 100)
  255.             writer.writerow(row_data)
  256.     print(f"written to {filename}")
  257.  
  258.  
  259. def main():
  260.     INPUT_FILE = "2.9_cm20140611204525_Full_Raw_file_Orig.csv"
  261.     BASE_FILE_NAME = INPUT_FILE[:-4]
  262.     with open(INPUT_FILE, "r") as f:
  263.         reader = csv.reader(f)
  264.         Data = [row for row in reader]
  265.  
  266.     print(Data[:5])
  267.     Data = Data[1:]
  268.     for i in range(len(Data)):
  269.         for j in range(len(Data[i])):
  270.             Data[i][j] = float(Data[i][j])
  271.  
  272.     SNR_THRESHOLD = 20
  273.     CORR_THRESHOLD = 70
  274.     VELOCITY_MULTIPLIER = 1.5
  275.     CORR_COLS = [15, 16, 17]
  276.     SNR_COLS = [11, 12, 13]
  277.     RAW_COLS = [3, 4, 5]
  278.  
  279.     print(
  280.         """
  281.        Choose your choice:
  282.  
  283.        0) Minimum SNR
  284.        1) Minimum Correlation
  285.        2) Average SNR
  286.        3) Average Correlation
  287.        4) Min Correlation & Min SNR
  288.        5) Min Correlation & Average SNR
  289.        6) Average Correlation & Min SNR
  290.        7) Average Correlation & Average SNR
  291.        8) Velocity Threshold
  292.        9) Absolute Velocity Threshold
  293.        10) All of the above
  294.  
  295.        Choose (0) to (10):
  296.        """
  297.     )
  298.     detection_choice = int(input())
  299.     if detection_choice < 0 or detection_choice > 10:
  300.         return
  301.     print(
  302.         """
  303.        Choose your choice:
  304.  
  305.        0) extrapolation from the preceding data point
  306.        1) extrapolation from the two preceding points
  307.        2) the overall mean of the signal
  308.        3) a smoothed estimate
  309.        4) interpolation between the ends of the spike
  310.        5) All of the above
  311.  
  312.        Choose (0) to (5):
  313.        """
  314.     )
  315.     replacement_choice = int(input())
  316.     if replacement_choice < 0 or replacement_choice > 5:
  317.         return
  318.  
  319.     if detection_choice == 10 and replacement_choice == 5:
  320.         for i in range(10):
  321.             for j in range(5):
  322.                 new_data = deepcopy(Data)
  323.                 detection = DetectSpikes(
  324.                     new_data,
  325.                     CORR_THRESHOLD,
  326.                     SNR_THRESHOLD,
  327.                     VELOCITY_MULTIPLIER,
  328.                     CORR_COLS,
  329.                     SNR_COLS,
  330.                 )
  331.                 replacement = ReplaceSpikes(new_data, RAW_COLS)
  332.                 filename = detection.detection_methods[i](
  333.                     replacement.replacement_methods[j]
  334.                 )
  335.                 write_to_file(
  336.                     new_data,
  337.                     [0] + RAW_COLS,
  338.                     BASE_FILE_NAME + "_" + filename,
  339.                     ["TIME", "FILTERED_U", "FILTERED_V", "FILTERED_W"],
  340.                 )
  341.         return
  342.     elif detection_choice == 10:
  343.         for i in range(10):
  344.             new_data = deepcopy(Data)
  345.             detection = DetectSpikes(
  346.                 new_data,
  347.                 CORR_THRESHOLD,
  348.                 SNR_THRESHOLD,
  349.                 VELOCITY_MULTIPLIER,
  350.                 CORR_COLS,
  351.                 SNR_COLS,
  352.             )
  353.             replacement = ReplaceSpikes(new_data, RAW_COLS)
  354.             filename = detection.detection_methods[i](
  355.                 replacement.replacement_methods[replacement_choice]
  356.             )
  357.             write_to_file(
  358.                 new_data,
  359.                 [0] + RAW_COLS,
  360.                 BASE_FILE_NAME + "_" + filename,
  361.                 ["TIME", "FILTERED_U", "FILTERED_V", "FILTERED_W"],
  362.             )
  363.         return
  364.     elif replacement_choice == 5:
  365.         for j in range(5):
  366.             new_data = deepcopy(Data)
  367.             detection = DetectSpikes(
  368.                 new_data,
  369.                 CORR_THRESHOLD,
  370.                 SNR_THRESHOLD,
  371.                 VELOCITY_MULTIPLIER,
  372.                 CORR_COLS,
  373.                 SNR_COLS,
  374.             )
  375.             replacement = ReplaceSpikes(new_data, RAW_COLS)
  376.             filename = detection.detection_methods[detection_choice](
  377.                 replacement.replacement_methods[j]
  378.             )
  379.             write_to_file(
  380.                 new_data,
  381.                 [0] + RAW_COLS,
  382.                 BASE_FILE_NAME + "_" + filename,
  383.                 ["TIME", "FILTERED_U", "FILTERED_V", "FILTERED_W"],
  384.             )
  385.         return
  386.  
  387.     detection = DetectSpikes(
  388.         Data, CORR_THRESHOLD, SNR_THRESHOLD, VELOCITY_MULTIPLIER, CORR_COLS, SNR_COLS
  389.     )
  390.     replacement = ReplaceSpikes(Data, RAW_COLS)
  391.     filename = detection.detection_methods[detection_choice](
  392.         replacement.replacement_methods[replacement_choice]
  393.     )
  394.     write_to_file(
  395.         Data,
  396.         [0] + RAW_COLS,
  397.         BASE_FILE_NAME + "_" + filename,
  398.         ["TIME", "FILTERED_U", "FILTERED_V", "FILTERED_W"],
  399.     )
  400.  
  401.  
  402. if __name__ == "__main__":
  403.     main()
  404.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement