#!/usr/bin/env python
"""
Scrape NoPi web page data and print results over a given date range.
It prints the maximum values across all satellites to help look for outliers.

Examples:
 ./get_nopi.py BD940_BD940_Zero 2022-10-08 2022-12-01 "GPS L2E" carr

 ./get_nopi.py --terrasat IN2TI/02 2023-02-03 2023-02-05 "Beidou B3" SNR
 
 # To get results from NoPi jamming system
 ./get_nopi.py --jam Results 2023-07-17 2023-07-19 "Beidou B3" SNR
"""

from typing import List
import argparse
import datetime
import re
import os
from zipfile import ZipFile
import numpy as np
import pandas as pd
import requests

class ParseComboData:
    """Parse combo data out of NoPi NoPiDI_funcs.js file and return a Pandas dataframe
    """
    def __init__(self):
        self.combo = {}
        self.meas = {}
        self.curr_cms = (-1,-1,-1)
        self.temp_list = []
        self.fields = {'sv_id':2,
                       'vld_epochs':3,
                       'vld_min':4,
                       'vld_max':5,
                       'vld_mean':6,
                       'vld_std':7,
                       'vld_mav':8}

    def get_df(self):
        """Return Pandas dataframe with all parsed data"""
        return pd.DataFrame(self.temp_list,
             columns=['combo','meas',
                      'sv_id',
                      'vld_epochs','vld_min','vld_max','vld_mean','vld_std','vld_mav'])


    def _set_combo_name(self,combo_num,name):
        self.combo[combo_num] = name

    def _set_meas_units(self,combo_num,meas_num,units):
        if units in ['[mcycles]','[mm]']:
            self.meas[(combo_num,meas_num)] = 'carr'
        elif units == '[m]':
            self.meas[(combo_num,meas_num)] = 'range'
        elif units == '[m/s]':
            self.meas[(combo_num,meas_num)] = 'dopp'
        elif units == '[dB-Hz]':
            self.meas[(combo_num,meas_num)] = 'SNR'
        else:
            raise RuntimeError("unknown units",units)

    def _set_sv_field(self,combo,meas,sv_id,field,field_val):
        try:
            field_num = int(field_val)
        except ValueError:
            field_num = float(field_val)

        if self.curr_cms != (combo,meas,sv_id):
            self.curr_cms = (combo,meas,sv_id)
            self.temp_list.append([self.combo[combo],self.meas[(combo,meas)],
                                   sv_id,
                                   np.nan,np.nan,np.nan,np.nan,np.nan,np.nan])

        self.temp_list[-1][self.fields[field]] = field_num

    def parse(self,line):
        """Parse line from NoPi Javascript file and extract any data
        """
        m = re.match(r' +all_combo\[([0-9]+)\].combo_name = "(.*?)"',line)
        if m:
            self._set_combo_name(int(m.group(1)),m.group(2))
            return

        m = re.match(r' +all_combo\[([0-9]+)\].meas\[([0-9]+)\].units = "(.*?)"',line)
        if m:
            self._set_meas_units(int(m.group(1)),int(m.group(2)),m.group(3))
            return

        m = re.match(r' +all_combo\[([0-9]+)\].meas\[([0-9]+)\].sv\[([0-9]+)\].(.*?) = (.*);',line)
        if m:
            self._set_sv_field(int(m.group(1)),int(m.group(2)),int(m.group(3)),
                                m.group(4),m.group(5))

def get_fw_ver(lines):
    """Parse NoPi NoPiDI_Menu.html file and get firmware information.
    Returns list of unique firmwares,e.g ['5.70 - 0.33 / 2023-5-5']
    """
    all_fw = []
    for line in lines:
        if line.find('/ 0x') > 0:
            words = line.split(' / ')
            new_fw = words[0] + ' / ' + words[1]
            if new_fw not in all_fw:
                all_fw.append(new_fw)
    return all_fw

class NoPiData:
    """data class for get_one_data*() results"""
    def __init__(self, date : datetime.date, link: str, curr_fw : List[str], df : pd.DataFrame):
        self.date = date
        self.link = link
        self.curr_fw = curr_fw
        self.df = df

def get_one_data_terrasat(baseline_name,baseline_id,curr_date):
    """Given Terrasat baseline_name+station ID (e.g., IN2TI + 02) and a
    datetime.date() value (curr_date), return NoPiData
    """
    filename = "/net/meson/mnt/data_drive/TerrasatCycleSlips/NoPiAnalysis/Results/"
    filename += "%s/%d/%s/%03d.zip"%(
        baseline_name, curr_date.year, baseline_id, curr_date.timetuple().tm_yday)
    if os.path.isfile(filename):
        with ZipFile(filename, 'r') as f_in:
            data = f_in.read('web/NoPiDI_funcs.js')
        parser = ParseComboData()
        for line in data.decode('utf-8').split('\n'):
            parser.parse(line)
        with ZipFile(filename, 'r') as f_in:
            data = f_in.read('web/NoPiDI_Menu.html')
        curr_fw = get_fw_ver(data.decode('utf-8').split('\n'))
        curr_data = parser.get_df()
    else:
        curr_fw = []
        curr_data = pd.DataFrame()
    link=filename
    return NoPiData(curr_date, link, curr_fw, curr_data)

def get_data_terrasat(baseline,start_date,end_date):
    """Given Terrasat baseline+station ID (e.g., IN2TI/02) and range of
    datetime.date() values (start_date,end_date), yield NoPiData
    """
    baseline_name,baseline_id = baseline.split('/')
    curr_date = start_date
    delta = datetime.timedelta(days=1)
    while curr_date <= end_date:
        yield get_one_data_terrasat(baseline_name,baseline_id,curr_date)
        curr_date += delta

def get_one_data(baseline,curr_date):
    """Given NoPi baseline (e.g., BD940_BD940_Zero) and a datetime.date()
    (curr_date), return NoPiData
    """
    url='http://meson.eng.trimble.com:9999/%d-Res/'\
        '%02d/%d%02d%02d_%s/nopi.zip/web/NoPiDI_funcs.js' %(
            curr_date.year,curr_date.month,curr_date.year,
            curr_date.month,curr_date.day,baseline)
    data = requests.get(url,timeout=10)
    parser = ParseComboData()
    for line in data.content.decode('utf8').split('\n'):
        parser.parse(line)
    url='http://meson.eng.trimble.com:9999/%d-Res/'\
        '%02d/%d%02d%02d_%s/nopi.zip/web/NoPiDI_Menu.html' %(
            curr_date.year,curr_date.month,curr_date.year,
            curr_date.month,curr_date.day,baseline)
    link = url.replace('web/NoPiDI_Menu.html','NoPiDI_Top.html')
    data = requests.get(url,timeout=10)
    curr_fw = get_fw_ver(data.content.decode('utf8').split('\n'))
    return NoPiData(curr_date, link, curr_fw, parser.get_df())

def get_data(baseline,start_date,end_date):
    """Given NoPi baseline (e.g., BD940_BD940_Zero) and range of datetime.date() values
    (start_date,end_date), yield NoPiData
    """
    curr_date = start_date
    delta = datetime.timedelta(days=1)
    while curr_date <= end_date:
        yield get_one_data(baseline,curr_date)
        curr_date += delta


def get_one_data_jam(curr_date):
    """Given NoPi 'Jam' data datetime.date()
    (curr_date), return NoPiData
    """
    url='http://meson.eng.trimble.com:9998/%d%02d%02d%02d00.zip/web/NoPiDI_funcs.js' %(
            curr_date.year,curr_date.month,curr_date.day,curr_date.hour)
    data = requests.get(url,timeout=10)
    parser = ParseComboData()
    for line in data.content.decode('utf8').split('\n'):
        parser.parse(line)
    url='http://meson.eng.trimble.com:9998/%d%02d%02d%02d00.zip/web/NoPiDI_Menu.html' %(
            curr_date.year,curr_date.month,curr_date.day,curr_date.hour)
    link = url.replace('web/NoPiDI_Menu.html','NoPiDI_Top.html')
    data = requests.get(url,timeout=10)
    curr_fw = get_fw_ver(data.content.decode('utf8').split('\n'))
    return NoPiData(curr_date, link, curr_fw, parser.get_df())



def get_data_jam(baseline,start_date,end_date):
    """Given NoPi and range of datetime.date() values - baseline is ignored
    (start_date,end_date), yield NoPiData
    """
    # Need to convert from a datetime.date to datetime.datetime object so
    # we can adjust in hour chunks. The Jam data is 10Hz, new NoPi result per hour
    end_date  = datetime.datetime(end_date.year,end_date.month,end_date.day)
    curr_date = datetime.datetime(start_date.year,start_date.month,start_date.day)

    delta = datetime.timedelta(seconds=3600)
    while curr_date <= end_date:
        yield get_one_data_jam(curr_date)
        curr_date += delta



def get_unique_combo_meas( df ):
    """Get dataframe with unique combo/meas combinations"""
    return df.groupby(['combo','meas']).size().reset_index().rename(columns={0:'count'})

def combine_all_df( all_df ):
    """Input: all_df = [date(), firmware list, dataframe for given date()]
    Returns dictionary of data for each combo/meas.
    The goal is to be able to look at all data for a single combo/meas.
    """
    count_combos = get_unique_combo_meas(all_df[0].data)
    unique_combos = {}
    for n,x in count_combos.iterrows():
        for n,data in enumerate(all_df):
            curr_df = data.df[(data.df.combo==x.combo)
                              &(data.df.meas==x.meas)
                              &(data.df.vld_epochs>0)]
            if n == 0:
                allday = curr_df
            else:
                allday = pd.concat([allday,curr_df])
        unique_combos[(x.combo,x.meas)] = allday
    return unique_combos

def main():
    """Main entry point for script - see file docstring"""
    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('baseline_desc',
                        help='NoPi baseline name')
    parser.add_argument('start_date',
                        type=lambda s: datetime.datetime.strptime(s, '%Y-%m-%d').date(),
                        help='Start date in YYYY-MM-DD format')
    parser.add_argument('end_date',
                        type=lambda s: datetime.datetime.strptime(s, '%Y-%m-%d').date(),
                        help='End date in YYYY-MM-DD format')
    parser.add_argument('signal',
                        help='Signal as shown on NoPI web page (e.g., "GPS L1CA" or "Beidou B3")')
    parser.add_argument('metric',
                        help='carr, range, dopp, or SNR')
    parser.add_argument('--terrasat','-t',
                        help='Instead of normal NoPi, get data from TerrasatCycleSlips',
                        action='store_true', default=False)
    # Note the Jam data does not use the baseline, but we force providing this, so it is ignored. To
    # get the jam data:
    # get_nopi.py --jam Results 2023-07-17 2023-07-19
    # "Results" is ignored
    parser.add_argument('--jam','-j',
                        help='Instead of normal NoPi, get data from /net/meson/data_drive/mnt/Jam_NoPi',
                        action='store_true', default=False)
    args = parser.parse_args()

    if args.terrasat:
        get_data_func = get_data_terrasat
    elif args.jam:
        get_data_func = get_data_jam
    else:
        get_data_func = get_data
    last_fw = ['']
    for info in get_data_func(args.baseline_desc,args.start_date,args.end_date):
        if len(info.df) == 0:
            print(' {}: No data found'.format(info.date))
            continue
        if last_fw != info.curr_fw:
            print("New FW {}:".format(info.curr_fw))
            last_fw = info.curr_fw
        curr_sig = info.df[(info.df.combo==args.signal)
                           &(info.df.meas==args.metric)]
        print(' {} # {}\tmax {}\tmean {}\tstd {}\tmav {}'.format(
            info.date,
            curr_sig.vld_epochs.max(),
            max(curr_sig.vld_max.abs().max(),curr_sig.vld_min.abs().max()),
            curr_sig.vld_mean.abs().max(),
            curr_sig.vld_std.max(),
            curr_sig.vld_mav.max()))

if __name__ == '__main__':
    main()
