Module ttemtoolbox.core.lithology_connect

Expand source code
#!/usr/bin/env python
import pathlib
import numpy as np
import pandas as pd
import geopandas as gpd
from scipy.stats import pearsonr
from ttemtoolbox.core.process_well import ProcessWell

def select_closest(ttemdata: pd.DataFrame | gpd.GeoDataFrame,
                   welllog: pd.DataFrame | gpd.GeoDataFrame,
                   search_radius=500,
                   showskip=False,
                   ):
    def get_distance(group1, group2):
        dis = np.sqrt((group1[0] - group2[0]) ** 2 + (group1[1] - group2[1]) ** 2)
        return dis
    concatlist = []
    concatwell = []
    skipname = []
    skipdistace = []
    ori_well = welllog
    groups_well = ori_well.groupby('Bore')
    ttem_location = list(ttemdata.groupby(['X', 'Y']).groups.keys())
    for name, group in groups_well:
        wellxy = list(group[['X','Y']].iloc[0])
        well_ttem_distance = list(map(lambda x: get_distance(wellxy, x), ttem_location))
        minvalue = min(well_ttem_distance)
        if minvalue <= float(search_radius):
            point_match = ttem_location[well_ttem_distance.index(minvalue)]
            matchpoint = ttemdata[(ttemdata['X'] == point_match[0]) & (ttemdata['Y'] == point_match[1])].copy()
            matchpoint.loc[:, 'distance'] = minvalue
            matchpoint.loc[:, 'Bore'] = name
            concatlist.append(matchpoint)
            concatwell.append(group)
        else:
            skipname.append(name)
            skipdistace.append(minvalue)
    try:
        matched_ttem = pd.concat(concatlist).reset_index(drop=True)
    except ValueError:
        matched_ttem = pd.DataFrame()
    try:
        matched_well = pd.concat(concatwell).reset_index(drop=True)
    except ValueError:
        matched_well = pd.DataFrame()
    skipped = pd.DataFrame({'Bore': skipname, 'Distance': skipdistace})
    print('Total of {} well with in radius ({}m), {} skipped'.format(len(concatlist), search_radius, len(skipname)))
    if showskip is False:
        return matched_ttem, matched_well
    else:
        return matched_ttem, matched_well, skipped

def sum_thickness(welllog): # receive single bore well_log and sum up the thickness of fine/mix/coarse material
    output = pd.DataFrame(columns=["Lithology", "Thickness"])
    init_lith = welllog["Keyword"].iloc[0]
    init_elev = welllog["Elevation_top"].iloc[0]
    concatlist=[]
    # TODO make pure ndarry to fit JIT to increase speed
    for index, row in welllog.iterrows():
        if row["Keyword"] == init_lith:
            pass
        elif row["Keyword"] != init_lith:
            pack = [init_lith, init_elev - row["Elevation_top"]]
            tmp = pd.DataFrame(pack,index=('Lithology','Thickness')).T
            concatlist.append(tmp)
            init_lith = row["Keyword"]
            init_elev = row["Elevation_top"]
        else:
            print("error")
        if row["Elevation_top"] == welllog.loc[welllog.index[-1], 'Elevation_top']:
            pack = [init_lith, init_elev - row["Elevation_top"]]
            tmp = pd.DataFrame(pack,index=('Lithology','Thickness')).T
            concatlist.append(tmp)
    output = pd.concat(concatlist)
    output = output.groupby("Lithology")["Thickness"].sum()
    return output

def ttem_well_connect(matched_ttem, matched_well):
    # use ttem data interval to filter out welllog data to make a bootstrap ready dataframe
    matched_ttem[["Fine", "Mix", "Coarse"]] = 0
    #TODO make pure numpy format to increase speed
    concatlist= []
    ttem_group = matched_ttem.groupby('Bore')
    for name, group in ttem_group:
        well_select = matched_well[matched_well['Bore'] == name].copy()
        for index, row in group.iterrows():
            top = row["Elevation_Cell"]
            bott = row["Elevation_End"]
            match_litho = well_select[(well_select['Elevation_top'] >= bott) & (well_select['Elevation_bottom'] < top)]
            if match_litho.empty:
                break
            thickness = dict(match_litho.groupby('Keyword')['Keyword'].count()*match_litho['Thickness'].iloc[0])

            if "fine grain" in thickness:
                row["Fine"] = row["Coarse"]+thickness["fine grain"]
            else:
                row["Fine"] = 0
            if "mix grain" in thickness:
                row["Mix"] = row["Mix"]+thickness["mix grain"]
            else:
                row["Mix"] = 0
            if "coarse grain" in thickness:
                row["Coarse"] = row["Coarse"]+thickness["coarse grain"]
            else:
                row["Coarse"] = 0
            row = row.to_frame().T
            concatlist.append(row)
    df = pd.concat(concatlist).reset_index(drop=True)
    return df

def pre_bootstrap(dataframe,welllog, distance=500):
    matched_ttem, matched_well = select_closest(dataframe, welllog, search_radius=distance, showskip=False)
    stitched_ttem_well = ttem_well_connect(matched_ttem, matched_well)
    Resistivity = stitched_ttem_well["Resistivity"].to_numpy().astype('float64')
    Thickness_ratio = stitched_ttem_well[["Fine", "Mix", "Coarse"]].div(stitched_ttem_well["Thickness"],
                                                                   axis=0).to_numpy().astype('float64')
    return stitched_ttem_well, Resistivity, Thickness_ratio, matched_ttem, matched_well


def bootstrap(resistivity, thickness_ratio):
    """
    bootstrap method, randomly pick from pre_bootstrap dataset to create a new data set with same shape,
    use thenew data set as an over-determined problem to solve the equation. Repeat 1000 times and output the resistivity
    The linear algebra equation check https://ngwa.onlinelibrary.wiley.com/doi/full/10.1111/gwat.12656
    """
    print('Bootstraping...')
    fine_Resistivity = np.empty(1000)
    mix_Resistivity = np.empty(1000)
    coarse_Resistivity = np.empty(1000)
    for k in range(1000):
        random_index = np.random.choice(np.arange(len(resistivity)), len(resistivity), replace=True)
        resistivity_sample = resistivity[random_index]
        resistivity_reverse = 1 / resistivity_sample
        thickness_ratio_sample = thickness_ratio[random_index]
        lstsq_result = np.linalg.lstsq(thickness_ratio_sample, resistivity_reverse)
        if lstsq_result[0][0] == 0:
            fine_Resistivity[k] = 0
        else:
            fine_Resistivity[k] = 1/lstsq_result[0][0]
        if lstsq_result[0][1] == 0:
            mix_Resistivity[k] = 0
        else:
            mix_Resistivity[k] = 1/lstsq_result[0][1]
        if lstsq_result[0][2] == 0:
            coarse_Resistivity[k] = 0
        else:
            coarse_Resistivity[k] = 1/lstsq_result[0][2]
    print('Done!')
    return fine_Resistivity, mix_Resistivity, coarse_Resistivity

def confidence(bootstrap_result, confidence=95): #95% condifence interval
    confidence_index = [(100 - confidence)/2, confidence+(100 - confidence)/2]
    confidence_interval = [np.percentile(bootstrap_result,confidence_index[0]),
                           np.percentile(bootstrap_result, confidence_index[1])]
    return confidence_interval

def packup(Fine_Resistivity, Mix_Resistivity, Coarse_Resistivity):

    Resi_conf_df = pd.DataFrame({"Fine_conf": confidence(Fine_Resistivity),
                                 "Mix_conf": confidence(Mix_Resistivity),
                                 "Coarse_conf": confidence(Coarse_Resistivity)})
    return Resi_conf_df

Functions

def bootstrap(resistivity, thickness_ratio)

bootstrap method, randomly pick from pre_bootstrap dataset to create a new data set with same shape, use thenew data set as an over-determined problem to solve the equation. Repeat 1000 times and output the resistivity The linear algebra equation check https://ngwa.onlinelibrary.wiley.com/doi/full/10.1111/gwat.12656

Expand source code
def bootstrap(resistivity, thickness_ratio):
    """
    bootstrap method, randomly pick from pre_bootstrap dataset to create a new data set with same shape,
    use thenew data set as an over-determined problem to solve the equation. Repeat 1000 times and output the resistivity
    The linear algebra equation check https://ngwa.onlinelibrary.wiley.com/doi/full/10.1111/gwat.12656
    """
    print('Bootstraping...')
    fine_Resistivity = np.empty(1000)
    mix_Resistivity = np.empty(1000)
    coarse_Resistivity = np.empty(1000)
    for k in range(1000):
        random_index = np.random.choice(np.arange(len(resistivity)), len(resistivity), replace=True)
        resistivity_sample = resistivity[random_index]
        resistivity_reverse = 1 / resistivity_sample
        thickness_ratio_sample = thickness_ratio[random_index]
        lstsq_result = np.linalg.lstsq(thickness_ratio_sample, resistivity_reverse)
        if lstsq_result[0][0] == 0:
            fine_Resistivity[k] = 0
        else:
            fine_Resistivity[k] = 1/lstsq_result[0][0]
        if lstsq_result[0][1] == 0:
            mix_Resistivity[k] = 0
        else:
            mix_Resistivity[k] = 1/lstsq_result[0][1]
        if lstsq_result[0][2] == 0:
            coarse_Resistivity[k] = 0
        else:
            coarse_Resistivity[k] = 1/lstsq_result[0][2]
    print('Done!')
    return fine_Resistivity, mix_Resistivity, coarse_Resistivity
def confidence(bootstrap_result, confidence=95)
Expand source code
def confidence(bootstrap_result, confidence=95): #95% condifence interval
    confidence_index = [(100 - confidence)/2, confidence+(100 - confidence)/2]
    confidence_interval = [np.percentile(bootstrap_result,confidence_index[0]),
                           np.percentile(bootstrap_result, confidence_index[1])]
    return confidence_interval
def packup(Fine_Resistivity, Mix_Resistivity, Coarse_Resistivity)
Expand source code
def packup(Fine_Resistivity, Mix_Resistivity, Coarse_Resistivity):

    Resi_conf_df = pd.DataFrame({"Fine_conf": confidence(Fine_Resistivity),
                                 "Mix_conf": confidence(Mix_Resistivity),
                                 "Coarse_conf": confidence(Coarse_Resistivity)})
    return Resi_conf_df
def pre_bootstrap(dataframe, welllog, distance=500)
Expand source code
def pre_bootstrap(dataframe,welllog, distance=500):
    matched_ttem, matched_well = select_closest(dataframe, welllog, search_radius=distance, showskip=False)
    stitched_ttem_well = ttem_well_connect(matched_ttem, matched_well)
    Resistivity = stitched_ttem_well["Resistivity"].to_numpy().astype('float64')
    Thickness_ratio = stitched_ttem_well[["Fine", "Mix", "Coarse"]].div(stitched_ttem_well["Thickness"],
                                                                   axis=0).to_numpy().astype('float64')
    return stitched_ttem_well, Resistivity, Thickness_ratio, matched_ttem, matched_well
def select_closest(ttemdata: pandas.core.frame.DataFrame | geopandas.geodataframe.GeoDataFrame, welllog: pandas.core.frame.DataFrame | geopandas.geodataframe.GeoDataFrame, search_radius=500, showskip=False)
Expand source code
def select_closest(ttemdata: pd.DataFrame | gpd.GeoDataFrame,
                   welllog: pd.DataFrame | gpd.GeoDataFrame,
                   search_radius=500,
                   showskip=False,
                   ):
    def get_distance(group1, group2):
        dis = np.sqrt((group1[0] - group2[0]) ** 2 + (group1[1] - group2[1]) ** 2)
        return dis
    concatlist = []
    concatwell = []
    skipname = []
    skipdistace = []
    ori_well = welllog
    groups_well = ori_well.groupby('Bore')
    ttem_location = list(ttemdata.groupby(['X', 'Y']).groups.keys())
    for name, group in groups_well:
        wellxy = list(group[['X','Y']].iloc[0])
        well_ttem_distance = list(map(lambda x: get_distance(wellxy, x), ttem_location))
        minvalue = min(well_ttem_distance)
        if minvalue <= float(search_radius):
            point_match = ttem_location[well_ttem_distance.index(minvalue)]
            matchpoint = ttemdata[(ttemdata['X'] == point_match[0]) & (ttemdata['Y'] == point_match[1])].copy()
            matchpoint.loc[:, 'distance'] = minvalue
            matchpoint.loc[:, 'Bore'] = name
            concatlist.append(matchpoint)
            concatwell.append(group)
        else:
            skipname.append(name)
            skipdistace.append(minvalue)
    try:
        matched_ttem = pd.concat(concatlist).reset_index(drop=True)
    except ValueError:
        matched_ttem = pd.DataFrame()
    try:
        matched_well = pd.concat(concatwell).reset_index(drop=True)
    except ValueError:
        matched_well = pd.DataFrame()
    skipped = pd.DataFrame({'Bore': skipname, 'Distance': skipdistace})
    print('Total of {} well with in radius ({}m), {} skipped'.format(len(concatlist), search_radius, len(skipname)))
    if showskip is False:
        return matched_ttem, matched_well
    else:
        return matched_ttem, matched_well, skipped
def sum_thickness(welllog)
Expand source code
def sum_thickness(welllog): # receive single bore well_log and sum up the thickness of fine/mix/coarse material
    output = pd.DataFrame(columns=["Lithology", "Thickness"])
    init_lith = welllog["Keyword"].iloc[0]
    init_elev = welllog["Elevation_top"].iloc[0]
    concatlist=[]
    # TODO make pure ndarry to fit JIT to increase speed
    for index, row in welllog.iterrows():
        if row["Keyword"] == init_lith:
            pass
        elif row["Keyword"] != init_lith:
            pack = [init_lith, init_elev - row["Elevation_top"]]
            tmp = pd.DataFrame(pack,index=('Lithology','Thickness')).T
            concatlist.append(tmp)
            init_lith = row["Keyword"]
            init_elev = row["Elevation_top"]
        else:
            print("error")
        if row["Elevation_top"] == welllog.loc[welllog.index[-1], 'Elevation_top']:
            pack = [init_lith, init_elev - row["Elevation_top"]]
            tmp = pd.DataFrame(pack,index=('Lithology','Thickness')).T
            concatlist.append(tmp)
    output = pd.concat(concatlist)
    output = output.groupby("Lithology")["Thickness"].sum()
    return output
def ttem_well_connect(matched_ttem, matched_well)
Expand source code
def ttem_well_connect(matched_ttem, matched_well):
    # use ttem data interval to filter out welllog data to make a bootstrap ready dataframe
    matched_ttem[["Fine", "Mix", "Coarse"]] = 0
    #TODO make pure numpy format to increase speed
    concatlist= []
    ttem_group = matched_ttem.groupby('Bore')
    for name, group in ttem_group:
        well_select = matched_well[matched_well['Bore'] == name].copy()
        for index, row in group.iterrows():
            top = row["Elevation_Cell"]
            bott = row["Elevation_End"]
            match_litho = well_select[(well_select['Elevation_top'] >= bott) & (well_select['Elevation_bottom'] < top)]
            if match_litho.empty:
                break
            thickness = dict(match_litho.groupby('Keyword')['Keyword'].count()*match_litho['Thickness'].iloc[0])

            if "fine grain" in thickness:
                row["Fine"] = row["Coarse"]+thickness["fine grain"]
            else:
                row["Fine"] = 0
            if "mix grain" in thickness:
                row["Mix"] = row["Mix"]+thickness["mix grain"]
            else:
                row["Mix"] = 0
            if "coarse grain" in thickness:
                row["Coarse"] = row["Coarse"]+thickness["coarse grain"]
            else:
                row["Coarse"] = 0
            row = row.to_frame().T
            concatlist.append(row)
    df = pd.concat(concatlist).reset_index(drop=True)
    return df