import argparse
import csv
import re
import os
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import ParseError
#import json

# Define the headers for the output CSV file.
# This ensures a consistent column order in the output.
# Updated to include 'err_type' and remove 'Warn'
CSV_FIELDNAMES = [
    'index', 'Err', 'tsk_id', 'tsk_name', 'week', 'seconds', 'up', 'flags',
    'Rx', 'fw', 'PC', 'DA', 'SP', 'Callbacks', 'err_type',
]

def parse_log_entry(log_element):
    """
    Parses a single <log> element and its text content into a dictionary.

    Args:
        log_element: An xml.etree.ElementTree.Element representing a <log> tag.

    Returns:
        A dictionary containing the parsed data from the log entry.
    """
    data = {'index': log_element.get('index')}

    # The log data is stored in the element's text content.
    if not log_element.text:
        return data

    # Clean up the text: remove leading/trailing whitespace and merge lines.
    full_text = log_element.text.strip().replace('\n', ' ')

    # --- Special Handling for complex fields ---

    # 1. 'tsk' has two parts (e.g., tsk:38:Data_Logger)
    tsk_match = re.search(r'tsk:(\S+?):(\S+)', full_text)
    if tsk_match:
        data['tsk_id'] = tsk_match.group(1)
        data['tsk_name'] = tsk_match.group(2)

    # 2. 't' has two parts (e.g., t:2369:493056157)
    t_match = re.search(r't:(\S+?):(\S+)', full_text)
    if t_match:
        data['week'] = t_match.group(1)
        data['seconds'] = t_match.group(2)

    # 3. 'Callbacks' is a key followed by a list of values.
    callbacks_match = re.search(r'Callbacks:\s*(.*)', full_text)
    if callbacks_match:
        # Get all callback values and join them with a single space.
        callbacks_str = callbacks_match.group(1).strip()
        data['Callbacks'] = ' '.join(callbacks_str.split())
        # Remove the processed Callbacks string to avoid it being reparsed.
        full_text = full_text[:callbacks_match.start()]

    # 4. 'PC' (e.g., PC:006477D8)
    # Function to format PC, DA, SP values
    def format_address_field(value):
        if value:
            # Remove potential '0x' prefix for consistent processing
            if value.lower().startswith('0x'):
                value = value[2:]
            # Pad with leading zeros to 8 digits and add '0x' prefix
            return f'0x{value.zfill(8).upper()}'
        return value

    pc_match = re.search(r'PC:(\S+)', full_text)
    if pc_match:
        pc_value = pc_match.group(1)
        # Remove potential '0x' prefix for consistent processing
        if pc_value.lower().startswith('0x'):
            pc_value = pc_value[2:]
        # Pad with leading zeros to 8 digits and add '0x' prefix
        data['PC'] = f'0x{pc_value.zfill(8).upper()}'

    pc_match = re.search(r'PC:(\S+)', full_text)
    if pc_match:
        data['PC'] = format_address_field(pc_match.group(1))

    # 5. 'DA' (e.g., DA:006477D8)
    da_match = re.search(r'DA:(\S+)', full_text)
    if da_match:
        data['DA'] = format_address_field(da_match.group(1))

    # 6. 'SP' (e.g., SP:006477D8)
    sp_match = re.search(r'SP:(\S+)', full_text)
    if sp_match:
        data['SP'] = format_address_field(sp_match.group(1))

    # --- Generic Handling for simple "key:value" fields ---
    # This regex finds all words followed by a colon, then captures the
    # non-space characters that follow as the value.
    simple_pairs = re.findall(r'(\w+):(\S+)', full_text)
    for key, value in simple_pairs:
        # Add to dict only if not one of the specially handled keys.
        if key not in ['tsk', 't', 'PC', 'DA', 'SP']:
            data[key] = value

    return data


def main():
    """Main function to parse arguments and run the conversion."""
    parser = argparse.ArgumentParser(
        description="Convert an XML-style error log file to a CSV file.",
        formatter_class=argparse.RawTextHelpFormatter
    )
    parser.add_argument(
        'input_file',
        #nargs='?',
        default='errorLog.txt',
        help="The path to the input log file (default: errorLog.txt)"
    )
    parser.add_argument(
        '-o', '--output',
        dest='output_file',
        help=(
            "The path to the output CSV file.\n"
            "(default: replaces input extension with .csv)"
        )
    )
    args = parser.parse_args()

    input_file = args.input_file

    # Determine the output filename
    if args.output_file:
        output_file = args.output_file
    else:
        base_name = os.path.splitext(input_file)[0]
        output_file = f"{base_name}.csv"

    # --- Read Input and Parse ---
    print(f"Reading from '{input_file}'...")
    tree = ET.parse(input_file)
    root = tree.getroot()

    log_elements = root.findall('log')
    if not log_elements:
        print("Warning: No <log> entries were found in the input file.")
        return

    all_logs_data = [parse_log_entry(log) for log in log_elements]

    # Move the 'Warn' values to 'Err', add 'err_type' as either 'Warning'
    # or 'Error'. Note this can also be done by looking at the first nibble of
    # the Error code: 0xf... are errors, 0x8... are warnings.
    for entry in all_logs_data:
        warn_value = entry.get('Warn')
        err_value = entry.get('Err')

        if warn_value:
            entry['Err'] = warn_value  # Move 'Warn' content to 'Err'
            entry['err_type'] = 'Warning'
        elif err_value:
            entry['err_type'] = 'Error'
        else:
            entry['err_type'] = '' # No error or warning

        # Remove the original 'Warn' key
        if 'Warn' in entry:
            del entry['Warn']

    # --- Sort all_logs_data by 'week' and 'seconds' ---
    # Convert 'week' and 'seconds' to integers for proper numerical sorting.
    # Use a lambda function to define the sorting key.
    all_logs_data.sort(key=lambda x: (int(x.get('week', 0)), int(x.get('seconds', 0))))

    # Print each entry as a JSON string (uncomment if needed for debugging)
    #for entry in all_logs_data:
    #    print(json.dumps(entry, indent=2))

    # --- Write Output CSV ---
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=CSV_FIELDNAMES)
        writer.writeheader()
        writer.writerows(all_logs_data)
    print(f"Successfully converted {len(all_logs_data)} log entries.")
    print(f"Output saved to '{output_file}'")

if __name__ == '__main__':
    main()

