Module ttemtoolbox.utils.tools
Expand source code
#!/usr/bin/env python
import pandas as pd
import pathlib
from pathlib import Path
import re
import shutil
def keyword_search(fname, pattern):
"""
Try to find keyword that matching the pre-defined lithology keywords
:param fname:
:param pattern:
:return:
"""
if isinstance(fname, dict):
keys = [str(key).strip() for key in list(fname.keys())]
match = [key for key in keys if key.lower() in pattern]
return match
elif isinstance(fname, pd.DataFrame):
columns = [str(key).strip() for key in list(fname.columns)]
match = [column for column in columns if column.lower() in pattern]
return match
else:
raise TypeError('fname must be dict or DataFrame')
def skip_metadata(fname: pathlib.PurePath | str,
keyword: str) -> list:
"""
Use given keyword pattern to skip any metadata above the file in tTEM xyz file
:return:
"""
with open(str(fname), 'r') as file:
lines = file.readlines()
regex = re.compile(keyword)
match_index = []
for index, line in enumerate(lines):
if regex.search(line):
match_index.append(index)
if len(match_index) == 0:
raise ValueError('No keywords pattern matched "{}" in file {}'.format(keyword_pattern, str(fname)))
elif len(match_index) > 1:
raise ValueError('Found multiple keywords pattern matched "{}" in file {}'. format(keyword_pattern, str(fname)))
data = [line[1::].strip().split() for line in lines[match_index[0]::]]
return data
def type_convert(config_str: str) :
config_str = config_str.strip()
if len(config_str) == 0:
return None
if config_str.isdigit():
return int(config_str)
if config_str.replace('.', '', 1).isdigit():
return float(config_str)
if config_str == 'None':
return None
if config_str == 'T' or config_str == 'True':
return True
if config_str == 'F' or config_str == 'False':
return False
if config_str == 'NA' or config_str == 'NAN':
return float('nan')
if config_str.startswith('"') and config_str.endswith('"'):
return config_str.replace('"', '')
if config_str.startswith("'") and config_str.endswith("'"):
return config_str.replace("'", '')
if config_str.replace('e', '', 1).isdigit():
return int(float(config_str))
if config_str.replace('e-', '', 1).isdigit():
return float(config_str)
if config_str.startswith('[') and config_str.endswith(']'):
config_str = config_str[1:-1].split(',')
config_str = [item.strip() for item in config_str]
return config_str
return config_str
def parse_config(config_path: str | pathlib.PurePath) -> dict:
"""
This function takes a pathlik object point to CONFIG file and parse the config file into a dictionary
:param config_path: pathlike object, the path to the CONFIG file
:return: dictoionary, the parsed config file in a dictionary format
"""
config = {}
with open(config_path, 'r') as file:
lines = [line.strip() for line in file if not line.startswith(("#", " "))]
param_list = [param for param in lines if param != '']
for p in param_list:
key, value = p.split("=", 1)
key = key.strip()
value = type_convert(value)
config[key] = value
return config
def clean_output(output_folder: pathlib.PurePath, force=False):
if force:
shutil.rmtree(output_folder)
if any(output_folder.joinpath('deliver').glob('*')):
userinput = input('Files exist under delivery folder {}, would you like to overlap y/n?').format(output_folder.joinpath('deliver'))
if userinput.lower() == 'y':
shutil.rmtree(output_folder)
else:
return
def create_dir_structure(config: dict) -> dict:
temp_folder = Path(config['output']).joinpath('temp')
temp_folder.mkdir(parents=True, exist_ok=True)
deliver_folder = Path(config['output']).joinpath('deliver')
deliver_folder.mkdir(parents=True, exist_ok=True)
ttem_temp = temp_folder.joinpath('ttem_temp')
well_temp = temp_folder.joinpath('well_temp')
gamma_temp = temp_folder.joinpath('gamma_temp')
water_temp = temp_folder.joinpath('water_temp')
ttem_temp.mkdir(parents=True, exist_ok=True)
well_temp.mkdir(parents=True, exist_ok=True)
gamma_temp.mkdir(parents=True, exist_ok=True)
water_temp.mkdir(parents=True, exist_ok=True)
file_structure_dict = {'deliver': deliver_folder,
'ttem_temp':ttem_temp,
'well_temp':well_temp,
'gamma_temp':gamma_temp,
'water_temp':water_temp}
new_config = {**config, **file_structure_dict}
return new_config
Functions
def clean_output(output_folder: pathlib.PurePath, force=False)
-
Expand source code
def clean_output(output_folder: pathlib.PurePath, force=False): if force: shutil.rmtree(output_folder) if any(output_folder.joinpath('deliver').glob('*')): userinput = input('Files exist under delivery folder {}, would you like to overlap y/n?').format(output_folder.joinpath('deliver')) if userinput.lower() == 'y': shutil.rmtree(output_folder) else: return
def create_dir_structure(config: dict) ‑> dict
-
Expand source code
def create_dir_structure(config: dict) -> dict: temp_folder = Path(config['output']).joinpath('temp') temp_folder.mkdir(parents=True, exist_ok=True) deliver_folder = Path(config['output']).joinpath('deliver') deliver_folder.mkdir(parents=True, exist_ok=True) ttem_temp = temp_folder.joinpath('ttem_temp') well_temp = temp_folder.joinpath('well_temp') gamma_temp = temp_folder.joinpath('gamma_temp') water_temp = temp_folder.joinpath('water_temp') ttem_temp.mkdir(parents=True, exist_ok=True) well_temp.mkdir(parents=True, exist_ok=True) gamma_temp.mkdir(parents=True, exist_ok=True) water_temp.mkdir(parents=True, exist_ok=True) file_structure_dict = {'deliver': deliver_folder, 'ttem_temp':ttem_temp, 'well_temp':well_temp, 'gamma_temp':gamma_temp, 'water_temp':water_temp} new_config = {**config, **file_structure_dict} return new_config
def keyword_search(fname, pattern)
-
Try to find keyword that matching the pre-defined lithology keywords :param fname: :param pattern: :return:
Expand source code
def keyword_search(fname, pattern): """ Try to find keyword that matching the pre-defined lithology keywords :param fname: :param pattern: :return: """ if isinstance(fname, dict): keys = [str(key).strip() for key in list(fname.keys())] match = [key for key in keys if key.lower() in pattern] return match elif isinstance(fname, pd.DataFrame): columns = [str(key).strip() for key in list(fname.columns)] match = [column for column in columns if column.lower() in pattern] return match else: raise TypeError('fname must be dict or DataFrame')
def parse_config(config_path: str | pathlib.PurePath) ‑> dict
-
This function takes a pathlik object point to CONFIG file and parse the config file into a dictionary :param config_path: pathlike object, the path to the CONFIG file :return: dictoionary, the parsed config file in a dictionary format
Expand source code
def parse_config(config_path: str | pathlib.PurePath) -> dict: """ This function takes a pathlik object point to CONFIG file and parse the config file into a dictionary :param config_path: pathlike object, the path to the CONFIG file :return: dictoionary, the parsed config file in a dictionary format """ config = {} with open(config_path, 'r') as file: lines = [line.strip() for line in file if not line.startswith(("#", " "))] param_list = [param for param in lines if param != ''] for p in param_list: key, value = p.split("=", 1) key = key.strip() value = type_convert(value) config[key] = value return config
def skip_metadata(fname: str | pathlib.PurePath, keyword: str) ‑> list
-
Use given keyword pattern to skip any metadata above the file in tTEM xyz file :return:
Expand source code
def skip_metadata(fname: pathlib.PurePath | str, keyword: str) -> list: """ Use given keyword pattern to skip any metadata above the file in tTEM xyz file :return: """ with open(str(fname), 'r') as file: lines = file.readlines() regex = re.compile(keyword) match_index = [] for index, line in enumerate(lines): if regex.search(line): match_index.append(index) if len(match_index) == 0: raise ValueError('No keywords pattern matched "{}" in file {}'.format(keyword_pattern, str(fname))) elif len(match_index) > 1: raise ValueError('Found multiple keywords pattern matched "{}" in file {}'. format(keyword_pattern, str(fname))) data = [line[1::].strip().split() for line in lines[match_index[0]::]] return data
def type_convert(config_str: str)
-
Expand source code
def type_convert(config_str: str) : config_str = config_str.strip() if len(config_str) == 0: return None if config_str.isdigit(): return int(config_str) if config_str.replace('.', '', 1).isdigit(): return float(config_str) if config_str == 'None': return None if config_str == 'T' or config_str == 'True': return True if config_str == 'F' or config_str == 'False': return False if config_str == 'NA' or config_str == 'NAN': return float('nan') if config_str.startswith('"') and config_str.endswith('"'): return config_str.replace('"', '') if config_str.startswith("'") and config_str.endswith("'"): return config_str.replace("'", '') if config_str.replace('e', '', 1).isdigit(): return int(float(config_str)) if config_str.replace('e-', '', 1).isdigit(): return float(config_str) if config_str.startswith('[') and config_str.endswith(']'): config_str = config_str[1:-1].split(',') config_str = [item.strip() for item in config_str] return config_str return config_str