Module `ttemtoolbox.core.process_ttem`

Expand source code

#!/usr/bin/env python
# process_ttem.py
# Version 11.18.2023
# Created: 2023-11-17
# Author: Jiawei Li
import pathlib
from pathlib import Path
import re
import pandas as pd
import geopandas as gpd
import numpy as np
from ttemtoolbox.defaults.constants import XYZ_FILE_PATTERN, DOI_FILE_PATTERN
from ttemtoolbox.utils.tools import skip_metadata


class ProcessTTEM:
    """
    This function is used to format the tTEM data, and return a dataframe that contains filtered/processed tTEM data.\n
    if the input is a string or pathlib.PurePath object, the function will read the tTEM data from the file and \
    return a dataframe.\n
    if the input is a dataframe, the function will reuse the dataframe and return a dataframe.\n
    if the input is a list, the function will read all the tTEM data from the files in the list and return a \
    dataframe.\n
    Version 11.18.2023 \n
    :param fname: A string or pathlib.PurePath object that contains the path to the tTEM .xyz file exported from Aarhus Workbench
    :param doi_path: A string or pathlib.PurePath object that contains the path to the DOI file exported from Aarhus Workbench
    :param layer_exclude: A list that contains the layer number that you want to exclude from the tTEM data
    :param line_exclude: A list that contains the line number that you want to exclude from the tTEM data
    :param point_exclude: A list that contains the point number that you want to exclude from the tTEM data
    :param resample: A int value that indicates whether to fill the tTEM data with a factor, defaults is False
    :return: A pandas dataframe that contains the filtered/processed tTEM data
    """
    def __init__(self,
                 fname: pathlib.PurePath |str |list,
                 doi_path: pathlib.PurePath| str| list = None,
                 layer_exclude: list = None,
                 line_exclude: list = None,
                 ID_exclude: list = None,
                 resample: int = None,
                 unit: str = 'meter'):
        if not isinstance(fname, list):
            fname = [fname]
        if not isinstance(doi_path, list) and doi_path:
            doi_path = [doi_path]
        if unit == 'meter':
            self.unit = 'meter'
            self.unitconvert = 1
        elif unit == 'feet':
            self.unit = 'feet'
            self.unitconvert = 3.28084
        self.fname = fname
        self.doi_path = doi_path
        self.layer_exclude = layer_exclude
        self.line_exclude = line_exclude
        self.ID_exclude = ID_exclude
        self.resample = resample
        self.data = self._format_ttem()
        self.crs = self.data.crs

    @staticmethod
    def _read_ttem(fname: pathlib.PurePath| str, mtoft=1) -> pd.DataFrame| dict:
        """
        This function read tTEM data from .xyz file, and return a formatted dataframe that contains all the tTEM data. \n
        Version 11.18.2023 \n
        :param fname: A string or pathlib.PurePath object that contains the path to the tTEM .xyz file exported from Aarhus Workbench
        :return: A pandas dataframe that contains all the tTEM data without any filtering
        """
        data = skip_metadata(fname, XYZ_FILE_PATTERN)
        df = pd.DataFrame(data[1::], columns=data[0])
        df = df.astype({'ID': 'int64',
                                'Line_No': 'int64',
                                'Layer_No': 'int64',
                                'UTMX': 'float64',
                                'UTMY': 'float64',
                                'Elevation_Cell': 'float64',
                                'Resistivity': 'float64',
                                'Resistivity_STD': 'float64',
                                'Conductivity': 'float64',
                                'Depth_top': 'float64',
                                'Depth_bottom': 'float64',
                                'Thickness': 'float64',
                                'Thickness_STD': 'float64'
                                })
        df = df[~(df['Thickness_STD'] == float(9999))]
        df['Elevation_Cell'] = df['Elevation_Cell']/mtoft
        df['Depth_top'] = df['Depth_top']/mtoft
        df['Depth_bottom'] = df['Depth_bottom']/mtoft
        df['Thickness'] = df['Thickness']/mtoft
        return df
    
    @staticmethod
    def _find_crs(fname: pathlib.PurePath| str) -> str:
        """
        This function is used to find the CRS of the tTEM data, it will return the CRS of the tTEM data. \n
         \n
        :param fname: A string or pathlib.PurePath object that contains the path to the tTEM .xyz file exported from Aarhus Workbench
        :return: CRS of the tTEM data
        """
        with open(fname, "r") as file:
            lines = file.readlines()
        pattern = re.compile(r"epsg:(\d+)", re.IGNORECASE)
        for line in lines:
            match = pattern.search(line)
            if match:
                crs = match.group()
                crs = crs.upper()
                break
            else:
                crs = None
        return crs


    @staticmethod
    def _DOI(dataframe: pd.DataFrame,
             doi_path: pathlib.PurePath| str |list,
             mtoft=1) -> pd.DataFrame:
        """
        Remove all tTEM data under DOI elevation limit with provided DOI file from Aarhus Workbench \n
        Version 11.18.2023 \n
        :param dataframe: Datafram that constains tTEM data
        :param doi_path: path-like contains DOI file, or a list of path that contains multiple DOI files
        :return: Filtered tTEM data above DOI
        """
        
        doi_concatlist = []
        match_index = []
        for i in doi_path:
            print('Applying DOI {}.....'.format(Path(i).name))
            data = skip_metadata(i, DOI_FILE_PATTERN)
            tmp_doi_df = pd.DataFrame(data[1::], columns=data[0])
            doi_concatlist.append(tmp_doi_df)
        df_DOI = pd.concat(doi_concatlist)
        df_DOI = df_DOI.astype({'UTMX': 'float64',
                                'UTMY': 'float64',
                                'Value': 'float64'
                                })
        df_DOI['Value'] = df_DOI['Value']/mtoft
        df_group = dataframe.groupby(['UTMX', 'UTMY'])
        ttem_concatlist = []
        for name, group in df_group:
            try:
                elevation = df_DOI.loc[(df_DOI['UTMX'] == name[0]) & (df_DOI['UTMY'] == name[1])]['Value'].values[0]
                new_group = group[group['Elevation_Cell'] >= elevation]
                
                ttem_concatlist.append(new_group)
            except IndexError:
                continue
        df_out = pd.concat(ttem_concatlist)
        return df_out

    @staticmethod
    def _layer_exclude(dataframe: pd.DataFrame,
                       layer_exclude: list) -> pd.DataFrame:
        df_out = dataframe[~np.isin(dataframe["Layer_No"], layer_exclude)]
        print('Exclude layer {}'.format(layer_exclude))
        return df_out

    @staticmethod
    def _line_exclude(dataframe: pd.DataFrame,
                      line_exclude: list) -> pd.DataFrame:
        df_out = dataframe[~np.isin(dataframe["Line_No"], line_exclude)]
        print('Exclude line {}'.format(line_exclude))
        return df_out

    @staticmethod
    def _ID_exclude(dataframe: pd.DataFrame,
                       ID_exclude: list) -> pd.DataFrame:
        df_out = dataframe[~dataframe["ID"].isin(ID_exclude)]
        [print('Exclude point {}'.format(x)) for x in ID_exclude]
        return df_out

    @staticmethod
    def _to_linear(group: pd.DataFrame,
                   factor: int) -> pd.DataFrame:
        """
        The core algorithm of the resample method, it fills the tTEM from log to linear.\n
        Version 11.18.2023\n
        :param group: tTEM dataframe, typically a groups from pd.groupby method
        :param factor: how thin your thickness should be divided, e.g. 10 means 1/10 m thickness
        :return: linear thickness tTEM dataframe
        """

        newgroup = group.loc[group.index.repeat(group.Thickness * factor)]
        mul_per_gr = newgroup.groupby('Elevation_Cell').cumcount()
        newgroup['Elevation_Cell'] = newgroup['Elevation_Cell'].subtract(mul_per_gr * 1 / factor)
        newgroup['Depth_top'] = newgroup['Depth_top'].add(mul_per_gr * 1 / factor)
        newgroup['Depth_bottom'] = newgroup['Depth_top'].add(1 / factor)
        newgroup['Elevation_End'] = newgroup['Elevation_Cell'].subtract(1 / factor)
        newgroup['Thickness'] = 1 / factor
        return newgroup

    @staticmethod
    def _resample(dataframe: pd.DataFrame,
                  factor: int) -> pd.DataFrame:
        """
        This staticmethod is connected with format_ttem method, it converts the tTEM thickness from log to linear \
        layers with a consistant layer thickness .\n
        Version 11.18.2023\n
        :param dataframe: Dataframe that contains the tTEM data
        :param factor: how thin your thickness should be divided, e.g. 10 means 1/10 m thickness
        :return: resampled dataframe
        """
        concatlist = []
        groups = dataframe.groupby(['UTMX', 'UTMY'])
        for name, group in groups:
            newgroup = ProcessTTEM._to_linear(group, factor)
            concatlist.append(newgroup)
        result = pd.concat(concatlist)
        result.reset_index(drop=True, inplace=True)
        return result
    
    @staticmethod
    def _get_crs(fname: str | pathlib.PurePath) -> str:
        try:
            crs = ProcessTTEM._find_crs(fname[0])
        except:
            print('No CRS found in the file, set CRS to None, use set_crs method to assign a CRS.')
            crs = None
        return crs

    def _format_ttem(self):
        """
        This is the core method of the class that read file under varies input circumstances, and return a \
        formatted dataframe that contains filtered tTEM data. \n
        Version: 11.18.2023\n
        :return: A pandas dataframe that contains filtered tTEM data
        """
    # Read data under different input circumstances
        from pathlib import Path
        crs = self._get_crs(self.fname)
        tmp_df = pd.DataFrame()
        if len(self.fname) == 0:
            raise ValueError("The input is empty!")
        if isinstance(self.fname[0], (str, pathlib.PurePath)):
            concatlist = []
            for i in self.fname:
                tmp_df = self._read_ttem(i, self.unitconvert)
                concatlist.append(tmp_df)
                print("Reading data from file {}...".format(Path(i).name))
            tmp_df = pd.concat(concatlist)
        elif isinstance(self.fname[0], pd.DataFrame):
            print("Reading data from cache...")
            tmp_df = pd.concat(self.fname)
        if tmp_df.empty:
            raise ValueError("The input is empty!")
    # Create filter parameters
        if self.layer_exclude is not None:
            tmp_df = self._layer_exclude(tmp_df, self.layer_exclude)
        if self.line_exclude is not None:
            tmp_df = self._layer_exclude(tmp_df, self.line_exclude)
        if self.ID_exclude is not None:
            tmp_df = self._ID_exclude(tmp_df, self.ID_exclude)
        if self.doi_path is not None:
            tmp_df = self._DOI(tmp_df, self.doi_path)
        if self.resample is not None:
            tmp_df = self._resample(tmp_df, self.resample)
    # Sort the dataframe
        tmp_df = tmp_df.sort_values(by=['ID', 'Line_No','Layer_No'])
        tmp_df.reset_index(drop=True, inplace=True)
        tmp_df["Elevation_End"] = tmp_df["Elevation_Cell"].subtract(tmp_df["Thickness"])
        self.data = tmp_df.copy()
        self.data.rename(columns={'UTMX': 'X', 'UTMY': 'Y'},inplace=True)
        if crs is not None:
            self.data = gpd.GeoDataFrame(self.data, 
                                        geometry=gpd.points_from_xy(self.data['X'], self.data['Y']),
                                        crs=crs)
        else: 
            self.data = gpd.GeoDataFrame(self.data, 
                                        geometry=gpd.points_from_xy(self.data['X'], self.data['Y']))
        return self.data

    

    def summary(self) -> gpd.GeoDataFrame:
        """
        This function generate a summary of the tTEM file which can be plot in the GIS contains all key information \
        about the tTEM
        :return: pd.DataFrame containing the summary of the tTEM info
        """

        id_group = self.data.groupby('ID')
        agg_group = id_group.agg({'Depth_bottom': 'max',
                                  'Elevation_Cell': 'max',
                                  'Elevation_End': 'min',
                                  'Resistivity': ['min', 'max', 'mean'],
                                  'X': 'mean', 'Y': 'mean'})
        agg_group.columns = agg_group.columns.map('_'.join)
        agg_group.index.name = None
        agg_group['ID'] = agg_group.index
        self.summary = agg_group
        self.summary.reset_index(drop=True, inplace=True)
        self.summary.rename(columns={'X_mean': 'X', 'Y_mean': 'Y'}, inplace=True)
        return self.summary
    
    def set_crs(self, new_crs: str):
        """
        Assigns a new coordinate reference system (CRS) to the object.

        Parameters:
            new_crs (str): The new CRS to be assigned. It should be in the format 'EPSG:<code>',
                           where <code> is the EPSG code of the CRS.

        Returns:
            str: The newly assigned CRS.

        Raises:
            ValueError: If the input CRS is not in the correct format.

        Example:
            >>> obj = ProcessTTEM(fname)
            >>> obj.assign_crs('EPSG:4326')
            The CRS is assigned to EPSG:4326
            'EPSG:4326'
        """
        pattern = r'^EPSG:\d+$'
        if bool(re.match(pattern, new_crs)):
            self.data.set_crs(new_crs, inplace=True)
            print('The CRS is assigned to {}'.format(new_crs))
        else: 
            raise ValueError("The input CRS is not valid, please use EPSG format, e.g. EPSG:4326")
        return self.crs
    
    def reproject(self, new_crs: str):
        """
        Reprojects the data to a new coordinate reference system (CRS).

        Parameters:
            new_crs (str): The new CRS to reproject the data to.

        Returns:
            GeoDataFrame: The reprojected data as a GeoDataFrame.
        """
        
        self.data = self.data.to_crs(new_crs)
        self.crs = self.data.crs
        self.data['X'] = self.data.geometry.x
        self.data['Y'] = self.data.geometry.y
        return self.data
        
        
    def to_shp(self, output_filepath: str | pathlib.PurePath):
        """
        This method converts the tTEM data to a shapefile or other supported geospatial formats.\n
        :param output_filepath: The path to save the output shapefile or geospatial file.
        
        """
        ttem_gdf = gpd.GeoDataFrame(self.summary, 
                                    geometry=gpd.points_from_xy(self.summary['X'], self.summary['Y']),
                                    crs=self.crs)
        if  Path(output_filepath).suffix.lower() == '.shp':
            ttem_gdf.to_file(output_filepath, driver='ESRI Shapefile')
            print('The output file is saved to {}'.format(Path(output_filepath).resolve()))
        elif Path(output_filepath).suffix.lower() == '.gpkg':
            ttem_gdf.to_file(output_filepath, driver='GPKG', layer=Path(self.fname[0]).stem)
            print('The output file is saved to {}'.format(Path(output_filepath).resolve()))
        elif Path(output_filepath).suffix.lower() == '.geojson':
            ttem_gdf.to_file(output_filepath, driver='GeoJSON')
            print('The output file is saved to {}'.format(Path(output_filepath).resolve()))
        else: 
            raise ValueError("The output file format is not supported, please use .shp, .gpkg, or .geojson")
            


if __name__ == "__main__":
    print('This is a module, please import it to use it.')
    import ttemtoolbox
    from pathlib import Path
    import geopandas as gpd
    from pathlib import Path
    workdir = Path.cwd()
    ttem_lslake = workdir.parent.parent.joinpath(r'data\PD22_I03_MOD.xyz')
    ttem_lsl = ttemtoolbox.process_ttem.ProcessTTEM(ttem_lslake)

Classes

class ProcessTTEM (fname: pathlib.PurePath | str | list, doi_path: pathlib.PurePath | str | list = None, layer_exclude: list = None, line_exclude: list = None, ID_exclude: list = None, resample: int = None, unit: str = 'meter')

This function is used to format the tTEM data, and return a dataframe that contains filtered/processed tTEM data.

if the input is a string or pathlib.PurePath object, the function will read the tTEM data from the file and return a dataframe.

if the input is a dataframe, the function will reuse the dataframe and return a dataframe.

if the input is a list, the function will read all the tTEM data from the files in the list and return a dataframe.

Version 11.18.2023

:param fname: A string or pathlib.PurePath object that contains the path to the tTEM .xyz file exported from Aarhus Workbench :param doi_path: A string or pathlib.PurePath object that contains the path to the DOI file exported from Aarhus Workbench :param layer_exclude: A list that contains the layer number that you want to exclude from the tTEM data :param line_exclude: A list that contains the line number that you want to exclude from the tTEM data :param point_exclude: A list that contains the point number that you want to exclude from the tTEM data :param resample: A int value that indicates whether to fill the tTEM data with a factor, defaults is False :return: A pandas dataframe that contains the filtered/processed tTEM data

Expand source code

class ProcessTTEM:
    """
    This function is used to format the tTEM data, and return a dataframe that contains filtered/processed tTEM data.\n
    if the input is a string or pathlib.PurePath object, the function will read the tTEM data from the file and \
    return a dataframe.\n
    if the input is a dataframe, the function will reuse the dataframe and return a dataframe.\n
    if the input is a list, the function will read all the tTEM data from the files in the list and return a \
    dataframe.\n
    Version 11.18.2023 \n
    :param fname: A string or pathlib.PurePath object that contains the path to the tTEM .xyz file exported from Aarhus Workbench
    :param doi_path: A string or pathlib.PurePath object that contains the path to the DOI file exported from Aarhus Workbench
    :param layer_exclude: A list that contains the layer number that you want to exclude from the tTEM data
    :param line_exclude: A list that contains the line number that you want to exclude from the tTEM data
    :param point_exclude: A list that contains the point number that you want to exclude from the tTEM data
    :param resample: A int value that indicates whether to fill the tTEM data with a factor, defaults is False
    :return: A pandas dataframe that contains the filtered/processed tTEM data
    """
    def __init__(self,
                 fname: pathlib.PurePath |str |list,
                 doi_path: pathlib.PurePath| str| list = None,
                 layer_exclude: list = None,
                 line_exclude: list = None,
                 ID_exclude: list = None,
                 resample: int = None,
                 unit: str = 'meter'):
        if not isinstance(fname, list):
            fname = [fname]
        if not isinstance(doi_path, list) and doi_path:
            doi_path = [doi_path]
        if unit == 'meter':
            self.unit = 'meter'
            self.unitconvert = 1
        elif unit == 'feet':
            self.unit = 'feet'
            self.unitconvert = 3.28084
        self.fname = fname
        self.doi_path = doi_path
        self.layer_exclude = layer_exclude
        self.line_exclude = line_exclude
        self.ID_exclude = ID_exclude
        self.resample = resample
        self.data = self._format_ttem()
        self.crs = self.data.crs

    @staticmethod
    def _read_ttem(fname: pathlib.PurePath| str, mtoft=1) -> pd.DataFrame| dict:
        """
        This function read tTEM data from .xyz file, and return a formatted dataframe that contains all the tTEM data. \n
        Version 11.18.2023 \n
        :param fname: A string or pathlib.PurePath object that contains the path to the tTEM .xyz file exported from Aarhus Workbench
        :return: A pandas dataframe that contains all the tTEM data without any filtering
        """
        data = skip_metadata(fname, XYZ_FILE_PATTERN)
        df = pd.DataFrame(data[1::], columns=data[0])
        df = df.astype({'ID': 'int64',
                                'Line_No': 'int64',
                                'Layer_No': 'int64',
                                'UTMX': 'float64',
                                'UTMY': 'float64',
                                'Elevation_Cell': 'float64',
                                'Resistivity': 'float64',
                                'Resistivity_STD': 'float64',
                                'Conductivity': 'float64',
                                'Depth_top': 'float64',
                                'Depth_bottom': 'float64',
                                'Thickness': 'float64',
                                'Thickness_STD': 'float64'
                                })
        df = df[~(df['Thickness_STD'] == float(9999))]
        df['Elevation_Cell'] = df['Elevation_Cell']/mtoft
        df['Depth_top'] = df['Depth_top']/mtoft
        df['Depth_bottom'] = df['Depth_bottom']/mtoft
        df['Thickness'] = df['Thickness']/mtoft
        return df
    
    @staticmethod
    def _find_crs(fname: pathlib.PurePath| str) -> str:
        """
        This function is used to find the CRS of the tTEM data, it will return the CRS of the tTEM data. \n
         \n
        :param fname: A string or pathlib.PurePath object that contains the path to the tTEM .xyz file exported from Aarhus Workbench
        :return: CRS of the tTEM data
        """
        with open(fname, "r") as file:
            lines = file.readlines()
        pattern = re.compile(r"epsg:(\d+)", re.IGNORECASE)
        for line in lines:
            match = pattern.search(line)
            if match:
                crs = match.group()
                crs = crs.upper()
                break
            else:
                crs = None
        return crs


    @staticmethod
    def _DOI(dataframe: pd.DataFrame,
             doi_path: pathlib.PurePath| str |list,
             mtoft=1) -> pd.DataFrame:
        """
        Remove all tTEM data under DOI elevation limit with provided DOI file from Aarhus Workbench \n
        Version 11.18.2023 \n
        :param dataframe: Datafram that constains tTEM data
        :param doi_path: path-like contains DOI file, or a list of path that contains multiple DOI files
        :return: Filtered tTEM data above DOI
        """
        
        doi_concatlist = []
        match_index = []
        for i in doi_path:
            print('Applying DOI {}.....'.format(Path(i).name))
            data = skip_metadata(i, DOI_FILE_PATTERN)
            tmp_doi_df = pd.DataFrame(data[1::], columns=data[0])
            doi_concatlist.append(tmp_doi_df)
        df_DOI = pd.concat(doi_concatlist)
        df_DOI = df_DOI.astype({'UTMX': 'float64',
                                'UTMY': 'float64',
                                'Value': 'float64'
                                })
        df_DOI['Value'] = df_DOI['Value']/mtoft
        df_group = dataframe.groupby(['UTMX', 'UTMY'])
        ttem_concatlist = []
        for name, group in df_group:
            try:
                elevation = df_DOI.loc[(df_DOI['UTMX'] == name[0]) & (df_DOI['UTMY'] == name[1])]['Value'].values[0]
                new_group = group[group['Elevation_Cell'] >= elevation]
                
                ttem_concatlist.append(new_group)
            except IndexError:
                continue
        df_out = pd.concat(ttem_concatlist)
        return df_out

    @staticmethod
    def _layer_exclude(dataframe: pd.DataFrame,
                       layer_exclude: list) -> pd.DataFrame:
        df_out = dataframe[~np.isin(dataframe["Layer_No"], layer_exclude)]
        print('Exclude layer {}'.format(layer_exclude))
        return df_out

    @staticmethod
    def _line_exclude(dataframe: pd.DataFrame,
                      line_exclude: list) -> pd.DataFrame:
        df_out = dataframe[~np.isin(dataframe["Line_No"], line_exclude)]
        print('Exclude line {}'.format(line_exclude))
        return df_out

    @staticmethod
    def _ID_exclude(dataframe: pd.DataFrame,
                       ID_exclude: list) -> pd.DataFrame:
        df_out = dataframe[~dataframe["ID"].isin(ID_exclude)]
        [print('Exclude point {}'.format(x)) for x in ID_exclude]
        return df_out

    @staticmethod
    def _to_linear(group: pd.DataFrame,
                   factor: int) -> pd.DataFrame:
        """
        The core algorithm of the resample method, it fills the tTEM from log to linear.\n
        Version 11.18.2023\n
        :param group: tTEM dataframe, typically a groups from pd.groupby method
        :param factor: how thin your thickness should be divided, e.g. 10 means 1/10 m thickness
        :return: linear thickness tTEM dataframe
        """

        newgroup = group.loc[group.index.repeat(group.Thickness * factor)]
        mul_per_gr = newgroup.groupby('Elevation_Cell').cumcount()
        newgroup['Elevation_Cell'] = newgroup['Elevation_Cell'].subtract(mul_per_gr * 1 / factor)
        newgroup['Depth_top'] = newgroup['Depth_top'].add(mul_per_gr * 1 / factor)
        newgroup['Depth_bottom'] = newgroup['Depth_top'].add(1 / factor)
        newgroup['Elevation_End'] = newgroup['Elevation_Cell'].subtract(1 / factor)
        newgroup['Thickness'] = 1 / factor
        return newgroup

    @staticmethod
    def _resample(dataframe: pd.DataFrame,
                  factor: int) -> pd.DataFrame:
        """
        This staticmethod is connected with format_ttem method, it converts the tTEM thickness from log to linear \
        layers with a consistant layer thickness .\n
        Version 11.18.2023\n
        :param dataframe: Dataframe that contains the tTEM data
        :param factor: how thin your thickness should be divided, e.g. 10 means 1/10 m thickness
        :return: resampled dataframe
        """
        concatlist = []
        groups = dataframe.groupby(['UTMX', 'UTMY'])
        for name, group in groups:
            newgroup = ProcessTTEM._to_linear(group, factor)
            concatlist.append(newgroup)
        result = pd.concat(concatlist)
        result.reset_index(drop=True, inplace=True)
        return result
    
    @staticmethod
    def _get_crs(fname: str | pathlib.PurePath) -> str:
        try:
            crs = ProcessTTEM._find_crs(fname[0])
        except:
            print('No CRS found in the file, set CRS to None, use set_crs method to assign a CRS.')
            crs = None
        return crs

    def _format_ttem(self):
        """
        This is the core method of the class that read file under varies input circumstances, and return a \
        formatted dataframe that contains filtered tTEM data. \n
        Version: 11.18.2023\n
        :return: A pandas dataframe that contains filtered tTEM data
        """
    # Read data under different input circumstances
        from pathlib import Path
        crs = self._get_crs(self.fname)
        tmp_df = pd.DataFrame()
        if len(self.fname) == 0:
            raise ValueError("The input is empty!")
        if isinstance(self.fname[0], (str, pathlib.PurePath)):
            concatlist = []
            for i in self.fname:
                tmp_df = self._read_ttem(i, self.unitconvert)
                concatlist.append(tmp_df)
                print("Reading data from file {}...".format(Path(i).name))
            tmp_df = pd.concat(concatlist)
        elif isinstance(self.fname[0], pd.DataFrame):
            print("Reading data from cache...")
            tmp_df = pd.concat(self.fname)
        if tmp_df.empty:
            raise ValueError("The input is empty!")
    # Create filter parameters
        if self.layer_exclude is not None:
            tmp_df = self._layer_exclude(tmp_df, self.layer_exclude)
        if self.line_exclude is not None:
            tmp_df = self._layer_exclude(tmp_df, self.line_exclude)
        if self.ID_exclude is not None:
            tmp_df = self._ID_exclude(tmp_df, self.ID_exclude)
        if self.doi_path is not None:
            tmp_df = self._DOI(tmp_df, self.doi_path)
        if self.resample is not None:
            tmp_df = self._resample(tmp_df, self.resample)
    # Sort the dataframe
        tmp_df = tmp_df.sort_values(by=['ID', 'Line_No','Layer_No'])
        tmp_df.reset_index(drop=True, inplace=True)
        tmp_df["Elevation_End"] = tmp_df["Elevation_Cell"].subtract(tmp_df["Thickness"])
        self.data = tmp_df.copy()
        self.data.rename(columns={'UTMX': 'X', 'UTMY': 'Y'},inplace=True)
        if crs is not None:
            self.data = gpd.GeoDataFrame(self.data, 
                                        geometry=gpd.points_from_xy(self.data['X'], self.data['Y']),
                                        crs=crs)
        else: 
            self.data = gpd.GeoDataFrame(self.data, 
                                        geometry=gpd.points_from_xy(self.data['X'], self.data['Y']))
        return self.data

    

    def summary(self) -> gpd.GeoDataFrame:
        """
        This function generate a summary of the tTEM file which can be plot in the GIS contains all key information \
        about the tTEM
        :return: pd.DataFrame containing the summary of the tTEM info
        """

        id_group = self.data.groupby('ID')
        agg_group = id_group.agg({'Depth_bottom': 'max',
                                  'Elevation_Cell': 'max',
                                  'Elevation_End': 'min',
                                  'Resistivity': ['min', 'max', 'mean'],
                                  'X': 'mean', 'Y': 'mean'})
        agg_group.columns = agg_group.columns.map('_'.join)
        agg_group.index.name = None
        agg_group['ID'] = agg_group.index
        self.summary = agg_group
        self.summary.reset_index(drop=True, inplace=True)
        self.summary.rename(columns={'X_mean': 'X', 'Y_mean': 'Y'}, inplace=True)
        return self.summary
    
    def set_crs(self, new_crs: str):
        """
        Assigns a new coordinate reference system (CRS) to the object.

        Parameters:
            new_crs (str): The new CRS to be assigned. It should be in the format 'EPSG:<code>',
                           where <code> is the EPSG code of the CRS.

        Returns:
            str: The newly assigned CRS.

        Raises:
            ValueError: If the input CRS is not in the correct format.

        Example:
            >>> obj = ProcessTTEM(fname)
            >>> obj.assign_crs('EPSG:4326')
            The CRS is assigned to EPSG:4326
            'EPSG:4326'
        """
        pattern = r'^EPSG:\d+$'
        if bool(re.match(pattern, new_crs)):
            self.data.set_crs(new_crs, inplace=True)
            print('The CRS is assigned to {}'.format(new_crs))
        else: 
            raise ValueError("The input CRS is not valid, please use EPSG format, e.g. EPSG:4326")
        return self.crs
    
    def reproject(self, new_crs: str):
        """
        Reprojects the data to a new coordinate reference system (CRS).

        Parameters:
            new_crs (str): The new CRS to reproject the data to.

        Returns:
            GeoDataFrame: The reprojected data as a GeoDataFrame.
        """
        
        self.data = self.data.to_crs(new_crs)
        self.crs = self.data.crs
        self.data['X'] = self.data.geometry.x
        self.data['Y'] = self.data.geometry.y
        return self.data
        
        
    def to_shp(self, output_filepath: str | pathlib.PurePath):
        """
        This method converts the tTEM data to a shapefile or other supported geospatial formats.\n
        :param output_filepath: The path to save the output shapefile or geospatial file.
        
        """
        ttem_gdf = gpd.GeoDataFrame(self.summary, 
                                    geometry=gpd.points_from_xy(self.summary['X'], self.summary['Y']),
                                    crs=self.crs)
        if  Path(output_filepath).suffix.lower() == '.shp':
            ttem_gdf.to_file(output_filepath, driver='ESRI Shapefile')
            print('The output file is saved to {}'.format(Path(output_filepath).resolve()))
        elif Path(output_filepath).suffix.lower() == '.gpkg':
            ttem_gdf.to_file(output_filepath, driver='GPKG', layer=Path(self.fname[0]).stem)
            print('The output file is saved to {}'.format(Path(output_filepath).resolve()))
        elif Path(output_filepath).suffix.lower() == '.geojson':
            ttem_gdf.to_file(output_filepath, driver='GeoJSON')
            print('The output file is saved to {}'.format(Path(output_filepath).resolve()))
        else: 
            raise ValueError("The output file format is not supported, please use .shp, .gpkg, or .geojson")

Methods

def reproject(self, new_crs: str)

Reprojects the data to a new coordinate reference system (CRS).

Parameters

new_crs (str): The new CRS to reproject the data to.

Returns

GeoDataFrame: The reprojected data as a GeoDataFrame.

Expand source code

def reproject(self, new_crs: str):
    """
    Reprojects the data to a new coordinate reference system (CRS).

    Parameters:
        new_crs (str): The new CRS to reproject the data to.

    Returns:
        GeoDataFrame: The reprojected data as a GeoDataFrame.
    """
    
    self.data = self.data.to_crs(new_crs)
    self.crs = self.data.crs
    self.data['X'] = self.data.geometry.x
    self.data['Y'] = self.data.geometry.y
    return self.data

def set_crs(self, new_crs: str)

Assigns a new coordinate reference system (CRS) to the object.

Parameters

new_crs (str): The new CRS to be assigned. It should be in the format 'EPSG:', where is the EPSG code of the CRS.


Returns

str
The newly assigned CRS.

Raises

ValueError
If the input CRS is not in the correct format.

Example
>>> obj = ProcessTTEM(fname)
>>> obj.assign_crs('EPSG:4326')
The CRS is assigned to EPSG:4326
'EPSG:4326'




Expand source code

def set_crs(self, new_crs: str):
    """
    Assigns a new coordinate reference system (CRS) to the object.

    Parameters:
        new_crs (str): The new CRS to be assigned. It should be in the format 'EPSG:<code>',
                       where <code> is the EPSG code of the CRS.

    Returns:
        str: The newly assigned CRS.

    Raises:
        ValueError: If the input CRS is not in the correct format.

    Example:
        >>> obj = ProcessTTEM(fname)
        >>> obj.assign_crs('EPSG:4326')
        The CRS is assigned to EPSG:4326
        'EPSG:4326'
    """
    pattern = r'^EPSG:\d+$'
    if bool(re.match(pattern, new_crs)):
        self.data.set_crs(new_crs, inplace=True)
        print('The CRS is assigned to {}'.format(new_crs))
    else: 
        raise ValueError("The input CRS is not valid, please use EPSG format, e.g. EPSG:4326")
    return self.crs



def summary(self) ‑> geopandas.geodataframe.GeoDataFrame


This function generate a summary of the tTEM file which can be plot in the GIS contains all key information
about the tTEM
:return: pd.DataFrame containing the summary of the tTEM info


Expand source code

def summary(self) -> gpd.GeoDataFrame:
    """
    This function generate a summary of the tTEM file which can be plot in the GIS contains all key information \
    about the tTEM
    :return: pd.DataFrame containing the summary of the tTEM info
    """

    id_group = self.data.groupby('ID')
    agg_group = id_group.agg({'Depth_bottom': 'max',
                              'Elevation_Cell': 'max',
                              'Elevation_End': 'min',
                              'Resistivity': ['min', 'max', 'mean'],
                              'X': 'mean', 'Y': 'mean'})
    agg_group.columns = agg_group.columns.map('_'.join)
    agg_group.index.name = None
    agg_group['ID'] = agg_group.index
    self.summary = agg_group
    self.summary.reset_index(drop=True, inplace=True)
    self.summary.rename(columns={'X_mean': 'X', 'Y_mean': 'Y'}, inplace=True)
    return self.summary



def to_shp(self, output_filepath: str | pathlib.PurePath)


This method converts the tTEM data to a shapefile or other supported geospatial formats.
:param output_filepath: The path to save the output shapefile or geospatial file.


Expand source code

def to_shp(self, output_filepath: str | pathlib.PurePath):
    """
    This method converts the tTEM data to a shapefile or other supported geospatial formats.\n
    :param output_filepath: The path to save the output shapefile or geospatial file.
    
    """
    ttem_gdf = gpd.GeoDataFrame(self.summary, 
                                geometry=gpd.points_from_xy(self.summary['X'], self.summary['Y']),
                                crs=self.crs)
    if  Path(output_filepath).suffix.lower() == '.shp':
        ttem_gdf.to_file(output_filepath, driver='ESRI Shapefile')
        print('The output file is saved to {}'.format(Path(output_filepath).resolve()))
    elif Path(output_filepath).suffix.lower() == '.gpkg':
        ttem_gdf.to_file(output_filepath, driver='GPKG', layer=Path(self.fname[0]).stem)
        print('The output file is saved to {}'.format(Path(output_filepath).resolve()))
    elif Path(output_filepath).suffix.lower() == '.geojson':
        ttem_gdf.to_file(output_filepath, driver='GeoJSON')
        print('The output file is saved to {}'.format(Path(output_filepath).resolve()))
    else: 
        raise ValueError("The output file format is not supported, please use .shp, .gpkg, or .geojson")