#######################################################
# terrasat_to_sunnyvale_truth.py:
#
# Desc:
#  Given a .txt file in Terrasat format (specifically 40 Hz Pospac truth export), 
#  convert to 10Hz Sunnyvale format.
#
# Usage:
# python terrasat_to_sunnyvale_truth.py <input_file> <output_file>
#
# Notes:
#  Velocity sigmas are input as  nan columns because they are not included in the Terrasat data
#
#######################################################

import re
import sys

def process_file(input_file, output_file):
    with open(input_file, 'r') as file:
        lines = file.readlines()

    # Find the index where the data columns begin
    data_start_index = 0
    for i, line in enumerate(lines):
        if re.match(r'^\s*\d+\.\d+', line):
            data_start_index = i
            break

    # Define the original and new column orders
    original_columns = [
        "TIME", "DISTANCE", "EASTING", "NORTHING", "ELLIPSOID HEIGHT", "LATITUDE", "LONGITUDE", 
        "ELLIPSOID HEIGHT", "ROLL", "PITCH", "HEADING", "EAST VELOCITY", "NORTH VELOCITY", 
        "UP VELOCITY", "EAST SD", "NORTH SD", "HEIGHT SD", "ROLL SD", "PITCH SD", "HEADING SD"
    ]
    
    #New columns also includes three columns at the end with NaNs as placeholders for velocity sigmas included in the Sunnyvale data format
    new_columns = [
        "TIME", "LATITUDE", "LONGITUDE", "ELLIPSOID HEIGHT", "EAST VELOCITY", "NORTH VELOCITY", 
        "UP VELOCITY", "EAST SD", "NORTH SD", "HEIGHT SD", "ROLL", "PITCH", "HEADING", 
        "ROLL SD", "PITCH SD", "HEADING SD"
    ]

    # Create a mapping from original column indices to new column indices
    column_mapping = [original_columns.index(col) for col in new_columns]

    # Process the data lines
    processed_lines = []
    for i in range(data_start_index, len(lines)):
        columns = lines[i].split()
        if len(columns) < len(original_columns):
            continue  # Skip lines that do not have enough columns
        reordered_columns = [columns[j] for j in column_mapping]
        formatted_columns = [f"{float(col):.5f}" for col in reordered_columns]
        formatted_columns.extend(['NaN', 'NaN', 'NaN'])  # Add three NaN columns for velocity sigmas
        processed_lines.append(' '.join(formatted_columns) + '\n')

    # Determine the precision of the TIME column
    time_precision = len(re.search(r'\.(\d+)', processed_lines[0].split()[0]).group(1))
    time_increment = round(0.1, time_precision)

    # Filter to keep only 10Hz data
    filtered_lines = []
    last_time = None
    for line in processed_lines:
        time = float(line.split()[0])
        if last_time is None or round(time - last_time, time_precision) >= time_increment:
            filtered_lines.append(line)
            last_time = time

    # Write the processed data to the output file
    with open(output_file, 'w') as file:
        file.writelines(filtered_lines)

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: terrasat_to_sunnyvale.py <input_file> <output_file>")
        sys.exit(1)
    
    input_file = sys.argv[1]
    output_file = sys.argv[2]
    process_file(input_file, output_file)
