meerschaum
Meerschaum Python API
Welcome to the Meerschaum Python API technical documentation! Here you can find information about the classes and functions provided by the meerschaum
package. Visit meerschaum.io for general usage documentation.
Root Module
For your convenience, the following classes and functions may be imported from the root meerschaum
namespace:
Classes
Examples
Build a Connector
Get existing connectors or build a new one in-memory with the meerschaum.get_connector()
factory function:
import meerschaum as mrsm
sql_conn = mrsm.get_connector(
'sql:temp',
flavor='sqlite',
database='/tmp/tmp.db',
)
df = sql_conn.read("SELECT 1 AS foo")
print(df)
# foo
# 0 1
sql_conn.to_sql(df, 'foo')
print(sql_conn.read('foo'))
# foo
# 0 1
Create a Custom Connector Class
Decorate your connector classes with meerschaum.make_connector()
to designate it as a custom connector:
from datetime import datetime, timezone
from random import randint
import meerschaum as mrsm
from meerschaum.utils.misc import round_time
@mrsm.make_connector
class FooConnector(mrsm.Connector):
REQUIRED_ATTRIBUTES = ['username', 'password']
def fetch(
self,
begin: datetime | None = None,
end: datetime | None = None,
):
now = begin or round_time(datetime.now(timezone.utc))
return [
{'ts': now, 'id': 1, 'vl': randint(1, 100)},
{'ts': now, 'id': 2, 'vl': randint(1, 100)},
{'ts': now, 'id': 3, 'vl': randint(1, 100)},
]
foo_conn = mrsm.get_connector(
'foo:bar',
username='foo',
password='bar',
)
docs = foo_conn.fetch()
Build a Pipe
Build a meerschaum.Pipe
in-memory:
from datetime import datetime
import meerschaum as mrsm
pipe = mrsm.Pipe(
foo_conn, 'demo',
instance=sql_conn,
columns={'datetime': 'ts', 'id': 'id'},
tags=['production'],
)
pipe.sync(begin=datetime(2024, 1, 1))
df = pipe.get_data()
print(df)
# ts id vl
# 0 2024-01-01 1 97
# 1 2024-01-01 2 18
# 2 2024-01-01 3 96
Add temporary=True
to skip registering the pipe in the pipes table.
Get Registered Pipes
The meerschaum.get_pipes()
function returns a dictionary hierarchy of pipes by connector, metric, and location:
import meerschaum as mrsm
pipes = mrsm.get_pipes(instance='sql:temp')
pipe = pipes['foo:bar']['demo'][None]
Add as_list=True
to flatten the hierarchy:
import meerschaum as mrsm
pipes = mrsm.get_pipes(
tags=['production'],
instance=sql_conn,
as_list=True,
)
print(pipes)
# [Pipe('foo:bar', 'demo', instance='sql:temp')]
Import Plugins
You can import a plugin's module through meerschaum.Plugin.module
:
import meerschaum as mrsm
plugin = mrsm.Plugin('noaa')
with mrsm.Venv(plugin):
noaa = plugin.module
If your plugin has submodules, use meerschaum.plugins.from_plugin_import
:
from meerschaum.plugins import from_plugin_import
get_defined_pipes = from_plugin_import('compose.utils.pipes', 'get_defined_pipes')
Import multiple plugins with meerschaum.plugins.import_plugins
:
from meerschaum.plugins import import_plugins
noaa, compose = import_plugins('noaa', 'compose')
Create a Job
Create a meerschaum.Job
with name
and sysargs
:
import meerschaum as mrsm
job = mrsm.Job('syncing-engine', 'sync pipes --loop')
success, msg = job.start()
Pass executor_keys
as the connectors keys of an API instance to create a remote job:
import meerschaum as mrsm
job = mrsm.Job(
'foo',
'sync pipes -s daily',
executor_keys='api:main',
)
Import from a Virtual Environment
Use the meerschaum.Venv
context manager to activate a virtual environment:
import meerschaum as mrsm
with mrsm.Venv('noaa'):
import requests
print(requests.__file__)
# /home/bmeares/.config/meerschaum/venvs/noaa/lib/python3.12/site-packages/requests/__init__.py
To import packages which may not be installed, use meerschaum.attempt_import()
:
import meerschaum as mrsm
requests = mrsm.attempt_import('requests', venv='noaa')
print(requests.__file__)
# /home/bmeares/.config/meerschaum/venvs/noaa/lib/python3.12/site-packages/requests/__init__.py
Run Actions
Run sysargs
with meerschaum.entry()
:
import meerschaum as mrsm
success, msg = mrsm.entry('show pipes + show version : x2')
Use meerschaum.actions.get_action()
to access an action function directly:
from meerschaum.actions import get_action
show_pipes = get_action(['show', 'pipes'])
success, msg = show_pipes(connector_keys=['plugin:noaa'])
Get a dictionary of available subactions with meerschaum.actions.get_subactions()
:
from meerschaum.actions import get_subactions
subactions = get_subactions('show')
success, msg = subactions['pipes']()
Create a Plugin
Run bootstrap plugin
to create a new plugin:
mrsm bootstrap plugin example
This will create example.py
in your plugins directory (default ~/.config/meerschaum/plugins/
, Windows: %APPDATA%\Meerschaum\plugins
). You may paste the example code from the "Create a Custom Action" example below.
Open your plugin with edit plugin
:
mrsm edit plugin example
Run edit plugin
and paste the example code below to try out the features.
See the writing plugins guide for more in-depth documentation.
Create a Custom Action
Decorate a function with meerschaum.actions.make_action
to designate it as an action. Subactions will be automatically detected if not decorated:
from meerschaum.actions import make_action
@make_action
def sing():
print('What would you like me to sing?')
return True, "Success"
def sing_tune():
return False, "I don't know that song!"
def sing_song():
print('Hello, World!')
return True, "Success"
Use meerschaum.plugins.add_plugin_argument()
to create new parameters for your action:
from meerschaum.plugins import make_action, add_plugin_argument
add_plugin_argument(
'--song', type=str, help='What song to sing.',
)
@make_action
def sing_melody(action=None, song=None):
to_sing = action[0] if action else song
if not to_sing:
return False, "Please tell me what to sing!"
return True, f'~I am singing {to_sing}~'
mrsm sing melody lalala
mrsm sing melody --song do-re-mi
Add a Page to the Web Dashboard
Use the decorators meerschaum.plugins.dash_plugin()
and meerschaum.plugins.web_page()
to add new pages to the web dashboard:
from meerschaum.plugins import dash_plugin, web_page
@dash_plugin
def init_dash(dash_app):
import dash.html as html
import dash_bootstrap_components as dbc
from dash import Input, Output, no_update
### Routes to '/dash/my-page'
@web_page('/my-page', login_required=False)
def my_page():
return dbc.Container([
html.H1("Hello, World!"),
dbc.Button("Click me", id='my-button'),
html.Div(id="my-output-div"),
])
@dash_app.callback(
Output('my-output-div', 'children'),
Input('my-button', 'n_clicks'),
)
def my_button_click(n_clicks):
if not n_clicks:
return no_update
return html.P(f'You clicked {n_clicks} times!')
Submodules
meerschaum.actions
Access functions for actions and subactions.
meerschaum.actions.actions
meerschaum.actions.get_action()
meerschaum.actions.get_completer()
meerschaum.actions.get_main_action_name()
meerschaum.actions.get_subactions()
meerschaum.config
Read and write the Meerschaum configuration registry.
meerschaum.config.get_config()
meerschaum.config.get_plugin_config()
meerschaum.config.write_config()
meerschaum.config.write_plugin_config()
meerschaum.connectors
Build connectors to interact with databases and fetch data.
meerschaum.connectors.get_connector()
meerschaum.connectors.make_connector()
meerschaum.connectors.is_connected()
meerschaum.connectors.poll.retry_connect()
meerschaum.connectors.Connector
meerschaum.connectors.sql.SQLConnector
meerschaum.connectors.api.APIConnector
meerschaum.connectors.valkey.ValkeyConnector
meerschaum.jobs
Start background jobs.
meerschaum.jobs.Job
meerschaum.jobs.Executor
meerschaum.jobs.systemd.SystemdExecutor
meerschaum.jobs.get_jobs()
meerschaum.jobs.get_filtered_jobs()
meerschaum.jobs.get_running_jobs()
meerschaum.jobs.get_stopped_jobs()
meerschaum.jobs.get_paused_jobs()
meerschaum.jobs.get_restart_jobs()
meerschaum.jobs.make_executor()
meerschaum.jobs.check_restart_jobs()
meerschaum.jobs.start_check_jobs_thread()
meerschaum.jobs.stop_check_jobs_thread()
meerschaum.plugins
Access plugin modules and other API utilties.
meerschaum.plugins.Plugin
meerschaum.plugins.api_plugin()
meerschaum.plugins.dash_plugin()
meerschaum.plugins.import_plugins()
meerschaum.plugins.reload_plugins()
meerschaum.plugins.get_plugins()
meerschaum.plugins.get_data_plugins()
meerschaum.plugins.add_plugin_argument()
meerschaum.plugins.pre_sync_hook()
meerschaum.plugins.post_sync_hook()
meerschaum.utils
Utility functions are available in several submodules:
meerschaum.utils.daemon.daemon_entry()
meerschaum.utils.daemon.daemon_action()
meerschaum.utils.daemon.get_daemons()
meerschaum.utils.daemon.get_daemon_ids()
meerschaum.utils.daemon.get_running_daemons()
meerschaum.utils.daemon.get_paused_daemons()
meerschaum.utils.daemon.get_stopped_daemons()
meerschaum.utils.daemon.get_filtered_daemons()
meerschaum.utils.daemon.run_daemon()
meerschaum.utils.daemon.Daemon
meerschaum.utils.daemon.FileDescriptorInterceptor
meerschaum.utils.daemon.RotatingFile
meerschaum.utils.daemon
Manage background jobs.
meerschaum.utils.dataframe.add_missing_cols_to_df()
meerschaum.utils.dataframe.df_is_chunk_generator()
meerschaum.utils.dataframe.enforce_dtypes()
meerschaum.utils.dataframe.filter_unseen_df()
meerschaum.utils.dataframe.get_datetime_bound_from_df()
meerschaum.utils.dataframe.get_first_valid_dask_partition()
meerschaum.utils.dataframe.get_json_cols()
meerschaum.utils.dataframe.get_numeric_cols()
meerschaum.utils.dataframe.get_unhashable_cols()
meerschaum.utils.dataframe.parse_df_datetimes()
meerschaum.utils.dataframe.query_df()
meerschaum.utils.dataframe.to_json()
meerschaum.utils.dataframe
Manipulate dataframes.
meerschaum.utils.dtypes.are_dtypes_equal()
meerschaum.utils.dtypes.attempt_cast_to_numeric()
meerschaum.utils.dtypes.is_dtype_numeric()
meerschaum.utils.dtypes.none_if_null()
meerschaum.utils.dtypes.quantize_decimal()
meerschaum.utils.dtypes.to_pandas_dtype()
meerschaum.utils.dtypes.value_is_null()
meerschaum.utils.dtypes.sql.get_pd_type_from_db_type()
meerschaum.utils.dtypes.sql.get_db_type_from_pd_type()
meerschaum.utils.dtypes
Work with data types.
meerschaum.utils.formatting.colored()
meerschaum.utils.formatting.extract_stats_from_message()
meerschaum.utils.formatting.fill_ansi()
meerschaum.utils.formatting.get_console()
meerschaum.utils.formatting.highlight_pipes()
meerschaum.utils.formatting.make_header()
meerschaum.utils.formatting.pipe_repr()
meerschaum.utils.formatting.pprint()
meerschaum.utils.formatting.pprint_pipes()
meerschaum.utils.formatting.print_options()
meerschaum.utils.formatting.print_pipes_results()
meerschaum.utils.formatting.print_tuple()
meerschaum.utils.formatting.translate_rich_to_termcolor()
meerschaum.utils.formatting
Format output text.
meerschaum.utils.misc.items_str()
meerschaum.utils.misc.round_time()
meerschaum.utils.misc.is_int()
meerschaum.utils.misc.interval_str()
meerschaum.utils.misc.filter_keywords()
meerschaum.utils.misc.generate_password()
meerschaum.utils.misc.string_to_dict()
meerschaum.utils.misc.iterate_chunks()
meerschaum.utils.misc.timed_input()
meerschaum.utils.misc.replace_pipes_in_dict()
meerschaum.utils.misc.is_valid_email()
meerschaum.utils.misc.string_width()
meerschaum.utils.misc.replace_password()
meerschaum.utils.misc.parse_config_substitution()
meerschaum.utils.misc.edit_file()
meerschaum.utils.misc.get_in_ex_params()
meerschaum.utils.misc.separate_negation_values()
meerschaum.utils.misc.flatten_list()
meerschaum.utils.misc.make_symlink()
meerschaum.utils.misc.is_symlink()
meerschaum.utils.misc.wget()
meerschaum.utils.misc.add_method_to_class()
meerschaum.utils.misc.is_pipe_registered()
meerschaum.utils.misc.get_cols_lines()
meerschaum.utils.misc.sorted_dict()
meerschaum.utils.misc.flatten_pipes_dict()
meerschaum.utils.misc.dict_from_od()
meerschaum.utils.misc.remove_ansi()
meerschaum.utils.misc.get_connector_labels()
meerschaum.utils.misc.json_serialize_datetime()
meerschaum.utils.misc.async_wrap()
meerschaum.utils.misc.is_docker_available()
meerschaum.utils.misc.is_android()
meerschaum.utils.misc.is_bcp_available()
meerschaum.utils.misc.truncate_string_sections()
meerschaum.utils.misc.safely_extract_tar()
meerschaum.utils.misc
Miscellaneous utility functions.
meerschaum.utils.packages.attempt_import()
meerschaum.utils.packages.get_module_path()
meerschaum.utils.packages.manually_import_module()
meerschaum.utils.packages.get_install_no_version()
meerschaum.utils.packages.determine_version()
meerschaum.utils.packages.need_update()
meerschaum.utils.packages.get_pip()
meerschaum.utils.packages.pip_install()
meerschaum.utils.packages.pip_uninstall()
meerschaum.utils.packages.completely_uninstall_package()
meerschaum.utils.packages.run_python_package()
meerschaum.utils.packages.lazy_import()
meerschaum.utils.packages.pandas_name()
meerschaum.utils.packages.import_pandas()
meerschaum.utils.packages.import_rich()
meerschaum.utils.packages.import_dcc()
meerschaum.utils.packages.import_html()
meerschaum.utils.packages.get_modules_from_package()
meerschaum.utils.packages.import_children()
meerschaum.utils.packages.reload_package()
meerschaum.utils.packages.reload_meerschaum()
meerschaum.utils.packages.is_installed()
meerschaum.utils.packages.venv_contains_package()
meerschaum.utils.packages.package_venv()
meerschaum.utils.packages.ensure_readline()
meerschaum.utils.packages.get_prerelease_dependencies()
meerschaum.utils.packages
Manage Python packages.
meerschaum.utils.sql.build_where()
meerschaum.utils.sql.clean()
meerschaum.utils.sql.dateadd_str()
meerschaum.utils.sql.test_connection()
meerschaum.utils.sql.get_distinct_col_count()
meerschaum.utils.sql.sql_item_name()
meerschaum.utils.sql.pg_capital()
meerschaum.utils.sql.oracle_capital()
meerschaum.utils.sql.truncate_item_name()
meerschaum.utils.sql.table_exists()
meerschaum.utils.sql.get_table_cols_types()
meerschaum.utils.sql.get_update_queries()
meerschaum.utils.sql.get_null_replacement()
meerschaum.utils.sql.get_db_version()
meerschaum.utils.sql.get_rename_table_queries()
meerschaum.utils.sql.get_create_table_queries()
meerschaum.utils.sql.wrap_query_with_cte()
meerschaum.utils.sql.format_cte_subquery()
meerschaum.utils.sql.session_execute()
meerschaum.utils.sql.get_reset_autoincrement_queries()
meerschaum.utils.sql
Build SQL queries.
meerschaum.utils.venv.Venv
meerschaum.utils.venv.activate_venv()
meerschaum.utils.venv.deactivate_venv()
meerschaum.utils.venv.get_module_venv()
meerschaum.utils.venv.get_venvs()
meerschaum.utils.venv.init_venv()
meerschaum.utils.venv.inside_venv()
meerschaum.utils.venv.is_venv_active()
meerschaum.utils.venv.venv_exec()
meerschaum.utils.venv.venv_executable()
meerschaum.utils.venv.venv_exists()
meerschaum.utils.venv.venv_target_path()
meerschaum.utils.venv.verify_venv()
meerschaum.utils.venv
Manage virtual environments.
meerschaum.utils.warnings
Print warnings, errors, info, and debug messages.
1#! /usr/bin/env python 2# -*- coding: utf-8 -*- 3# vim:fenc=utf-8 4 5""" 6Copyright 2023 Bennett Meares 7 8Licensed under the Apache License, Version 2.0 (the "License"); 9you may not use this file except in compliance with the License. 10You may obtain a copy of the License at 11 12 http://www.apache.org/licenses/LICENSE-2.0 13 14Unless required by applicable law or agreed to in writing, software 15distributed under the License is distributed on an "AS IS" BASIS, 16WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17See the License for the specific language governing permissions and 18limitations under the License. 19""" 20 21import atexit 22from meerschaum.utils.typing import SuccessTuple 23from meerschaum.utils.packages import attempt_import 24from meerschaum.core.Pipe import Pipe 25from meerschaum.plugins import Plugin 26from meerschaum.utils.venv import Venv 27from meerschaum.jobs import Job, make_executor 28from meerschaum.connectors import get_connector, Connector, make_connector 29from meerschaum.utils import get_pipes 30from meerschaum.utils.formatting import pprint 31from meerschaum._internal.docs import index as __doc__ 32from meerschaum.config import __version__, get_config 33from meerschaum._internal.entry import entry 34from meerschaum.__main__ import _close_pools 35 36atexit.register(_close_pools) 37 38__pdoc__ = {'gui': False, 'api': False, 'core': False, '_internal': False} 39__all__ = ( 40 "get_pipes", 41 "get_connector", 42 "get_config", 43 "Pipe", 44 "Plugin", 45 "Venv", 46 "Plugin", 47 "Job", 48 "pprint", 49 "attempt_import", 50 "actions", 51 "config", 52 "connectors", 53 "jobs", 54 "plugins", 55 "utils", 56 "SuccessTuple", 57 "Connector", 58 "make_connector", 59 "entry", 60)
19def get_pipes( 20 connector_keys: Union[str, List[str], None] = None, 21 metric_keys: Union[str, List[str], None] = None, 22 location_keys: Union[str, List[str], None] = None, 23 tags: Optional[List[str]] = None, 24 params: Optional[Dict[str, Any]] = None, 25 mrsm_instance: Union[str, InstanceConnector, None] = None, 26 instance: Union[str, InstanceConnector, None] = None, 27 as_list: bool = False, 28 method: str = 'registered', 29 debug: bool = False, 30 **kw: Any 31) -> Union[PipesDict, List[mrsm.Pipe]]: 32 """ 33 Return a dictionary or list of `meerschaum.Pipe` objects. 34 35 Parameters 36 ---------- 37 connector_keys: Union[str, List[str], None], default None 38 String or list of connector keys. 39 If omitted or is `'*'`, fetch all possible keys. 40 If a string begins with `'_'`, select keys that do NOT match the string. 41 42 metric_keys: Union[str, List[str], None], default None 43 String or list of metric keys. See `connector_keys` for formatting. 44 45 location_keys: Union[str, List[str], None], default None 46 String or list of location keys. See `connector_keys` for formatting. 47 48 tags: Optional[List[str]], default None 49 If provided, only include pipes with these tags. 50 51 params: Optional[Dict[str, Any]], default None 52 Dictionary of additional parameters to search by. 53 Params are parsed into a SQL WHERE clause. 54 E.g. `{'a': 1, 'b': 2}` equates to `'WHERE a = 1 AND b = 2'` 55 56 mrsm_instance: Union[str, InstanceConnector, None], default None 57 Connector keys for the Meerschaum instance of the pipes. 58 Must be a `meerschaum.connectors.sql.SQLConnector.SQLConnector` or 59 `meerschaum.connectors.api.APIConnector.APIConnector`. 60 61 as_list: bool, default False 62 If `True`, return pipes in a list instead of a hierarchical dictionary. 63 `False` : `{connector_keys: {metric_key: {location_key: Pipe}}}` 64 `True` : `[Pipe]` 65 66 method: str, default 'registered' 67 Available options: `['registered', 'explicit', 'all']` 68 If `'registered'` (default), create pipes based on registered keys in the connector's pipes table 69 (API or SQL connector, depends on mrsm_instance). 70 If `'explicit'`, create pipes from provided connector_keys, metric_keys, and location_keys 71 instead of consulting the pipes table. Useful for creating non-existent pipes. 72 If `'all'`, create pipes from predefined metrics and locations. Required `connector_keys`. 73 **NOTE:** Method `'all'` is not implemented! 74 75 **kw: Any: 76 Keyword arguments to pass to the `meerschaum.Pipe` constructor. 77 78 79 Returns 80 ------- 81 A dictionary of dictionaries and `meerschaum.Pipe` objects 82 in the connector, metric, location hierarchy. 83 If `as_list` is `True`, return a list of `meerschaum.Pipe` objects. 84 85 Examples 86 -------- 87 ``` 88 >>> ### Manual definition: 89 >>> pipes = { 90 ... <connector_keys>: { 91 ... <metric_key>: { 92 ... <location_key>: Pipe( 93 ... <connector_keys>, 94 ... <metric_key>, 95 ... <location_key>, 96 ... ), 97 ... }, 98 ... }, 99 ... }, 100 >>> ### Accessing a single pipe: 101 >>> pipes['sql:main']['weather'][None] 102 >>> ### Return a list instead: 103 >>> get_pipes(as_list=True) 104 [sql_main_weather] 105 >>> 106 ``` 107 """ 108 109 from meerschaum.config import get_config 110 from meerschaum.utils.warnings import error 111 from meerschaum.utils.misc import filter_keywords 112 113 if connector_keys is None: 114 connector_keys = [] 115 if metric_keys is None: 116 metric_keys = [] 117 if location_keys is None: 118 location_keys = [] 119 if params is None: 120 params = {} 121 if tags is None: 122 tags = [] 123 124 if isinstance(connector_keys, str): 125 connector_keys = [connector_keys] 126 if isinstance(metric_keys, str): 127 metric_keys = [metric_keys] 128 if isinstance(location_keys, str): 129 location_keys = [location_keys] 130 131 ### Get SQL or API connector (keys come from `connector.fetch_pipes_keys()`). 132 if mrsm_instance is None: 133 mrsm_instance = instance 134 if mrsm_instance is None: 135 mrsm_instance = get_config('meerschaum', 'instance', patch=True) 136 if isinstance(mrsm_instance, str): 137 from meerschaum.connectors.parse import parse_instance_keys 138 connector = parse_instance_keys(keys=mrsm_instance, debug=debug) 139 else: 140 from meerschaum.connectors import instance_types 141 valid_connector = False 142 if hasattr(mrsm_instance, 'type'): 143 if mrsm_instance.type in instance_types: 144 valid_connector = True 145 if not valid_connector: 146 error(f"Invalid instance connector: {mrsm_instance}") 147 connector = mrsm_instance 148 if debug: 149 from meerschaum.utils.debug import dprint 150 dprint(f"Using instance connector: {connector}") 151 if not connector: 152 error(f"Could not create connector from keys: '{mrsm_instance}'") 153 154 ### Get a list of tuples for the keys needed to build pipes. 155 result = fetch_pipes_keys( 156 method, 157 connector, 158 connector_keys = connector_keys, 159 metric_keys = metric_keys, 160 location_keys = location_keys, 161 tags = tags, 162 params = params, 163 debug = debug 164 ) 165 if result is None: 166 error(f"Unable to build pipes!") 167 168 ### Populate the `pipes` dictionary with Pipes based on the keys 169 ### obtained from the chosen `method`. 170 from meerschaum import Pipe 171 pipes = {} 172 for ck, mk, lk in result: 173 if ck not in pipes: 174 pipes[ck] = {} 175 176 if mk not in pipes[ck]: 177 pipes[ck][mk] = {} 178 179 pipes[ck][mk][lk] = Pipe( 180 ck, mk, lk, 181 mrsm_instance = connector, 182 debug = debug, 183 **filter_keywords(Pipe, **kw) 184 ) 185 186 if not as_list: 187 return pipes 188 from meerschaum.utils.misc import flatten_pipes_dict 189 return flatten_pipes_dict(pipes)
Return a dictionary or list of meerschaum.Pipe
objects.
Parameters
- connector_keys (Union[str, List[str], None], default None):
String or list of connector keys.
If omitted or is
'*'
, fetch all possible keys. If a string begins with'_'
, select keys that do NOT match the string. - metric_keys (Union[str, List[str], None], default None):
String or list of metric keys. See
connector_keys
for formatting. - location_keys (Union[str, List[str], None], default None):
String or list of location keys. See
connector_keys
for formatting. - tags (Optional[List[str]], default None): If provided, only include pipes with these tags.
- params (Optional[Dict[str, Any]], default None):
Dictionary of additional parameters to search by.
Params are parsed into a SQL WHERE clause.
E.g.
{'a': 1, 'b': 2}
equates to'WHERE a = 1 AND b = 2'
- mrsm_instance (Union[str, InstanceConnector, None], default None):
Connector keys for the Meerschaum instance of the pipes.
Must be a
meerschaum.connectors.sql.SQLConnector.SQLConnector
ormeerschaum.connectors.api.APIConnector.APIConnector
. - as_list (bool, default False):
If
True
, return pipes in a list instead of a hierarchical dictionary.False
:{connector_keys: {metric_key: {location_key: Pipe}}}
True
:[Pipe]
- method (str, default 'registered'):
Available options:
['registered', 'explicit', 'all']
If'registered'
(default), create pipes based on registered keys in the connector's pipes table (API or SQL connector, depends on mrsm_instance). If'explicit'
, create pipes from provided connector_keys, metric_keys, and location_keys instead of consulting the pipes table. Useful for creating non-existent pipes. If'all'
, create pipes from predefined metrics and locations. Requiredconnector_keys
. NOTE: Method'all'
is not implemented! - **kw (Any:):
Keyword arguments to pass to the
meerschaum.Pipe
constructor.
Returns
- A dictionary of dictionaries and
meerschaum.Pipe
objects - in the connector, metric, location hierarchy.
- If
as_list
isTrue
, return a list ofmeerschaum.Pipe
objects.
Examples
>>> ### Manual definition:
>>> pipes = {
... <connector_keys>: {
... <metric_key>: {
... <location_key>: Pipe(
... <connector_keys>,
... <metric_key>,
... <location_key>,
... ),
... },
... },
... },
>>> ### Accessing a single pipe:
>>> pipes['sql:main']['weather'][None]
>>> ### Return a list instead:
>>> get_pipes(as_list=True)
[sql_main_weather]
>>>
81def get_connector( 82 type: str = None, 83 label: str = None, 84 refresh: bool = False, 85 debug: bool = False, 86 **kw: Any 87) -> Connector: 88 """ 89 Return existing connector or create new connection and store for reuse. 90 91 You can create new connectors if enough parameters are provided for the given type and flavor. 92 93 94 Parameters 95 ---------- 96 type: Optional[str], default None 97 Connector type (sql, api, etc.). 98 Defaults to the type of the configured `instance_connector`. 99 100 label: Optional[str], default None 101 Connector label (e.g. main). Defaults to `'main'`. 102 103 refresh: bool, default False 104 Refresh the Connector instance / construct new object. Defaults to `False`. 105 106 kw: Any 107 Other arguments to pass to the Connector constructor. 108 If the Connector has already been constructed and new arguments are provided, 109 `refresh` is set to `True` and the old Connector is replaced. 110 111 Returns 112 ------- 113 A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`, 114 `meerschaum.connectors.sql.SQLConnector`). 115 116 Examples 117 -------- 118 The following parameters would create a new 119 `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file. 120 121 ``` 122 >>> conn = get_connector( 123 ... type = 'sql', 124 ... label = 'newlabel', 125 ... flavor = 'sqlite', 126 ... database = '/file/path/to/database.db' 127 ... ) 128 >>> 129 ``` 130 131 """ 132 from meerschaum.connectors.parse import parse_instance_keys 133 from meerschaum.config import get_config 134 from meerschaum.config.static import STATIC_CONFIG 135 from meerschaum.utils.warnings import warn 136 global _loaded_plugin_connectors 137 if isinstance(type, str) and not label and ':' in type: 138 type, label = type.split(':', maxsplit=1) 139 140 with _locks['_loaded_plugin_connectors']: 141 if not _loaded_plugin_connectors: 142 load_plugin_connectors() 143 _load_builtin_custom_connectors() 144 _loaded_plugin_connectors = True 145 146 if type is None and label is None: 147 default_instance_keys = get_config('meerschaum', 'instance', patch=True) 148 ### recursive call to get_connector 149 return parse_instance_keys(default_instance_keys) 150 151 ### NOTE: the default instance connector may not be main. 152 ### Only fall back to 'main' if the type is provided by the label is omitted. 153 label = label if label is not None else STATIC_CONFIG['connectors']['default_label'] 154 155 ### type might actually be a label. Check if so and raise a warning. 156 if type not in connectors: 157 possibilities, poss_msg = [], "" 158 for _type in get_config('meerschaum', 'connectors'): 159 if type in get_config('meerschaum', 'connectors', _type): 160 possibilities.append(f"{_type}:{type}") 161 if len(possibilities) > 0: 162 poss_msg = " Did you mean" 163 for poss in possibilities[:-1]: 164 poss_msg += f" '{poss}'," 165 if poss_msg.endswith(','): 166 poss_msg = poss_msg[:-1] 167 if len(possibilities) > 1: 168 poss_msg += " or" 169 poss_msg += f" '{possibilities[-1]}'?" 170 171 warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False) 172 return None 173 174 if 'sql' not in types: 175 from meerschaum.connectors.plugin import PluginConnector 176 from meerschaum.connectors.valkey import ValkeyConnector 177 with _locks['types']: 178 types.update({ 179 'api': APIConnector, 180 'sql': SQLConnector, 181 'plugin': PluginConnector, 182 'valkey': ValkeyConnector, 183 }) 184 185 ### determine if we need to call the constructor 186 if not refresh: 187 ### see if any user-supplied arguments differ from the existing instance 188 if label in connectors[type]: 189 warning_message = None 190 for attribute, value in kw.items(): 191 if attribute not in connectors[type][label].meta: 192 import inspect 193 cls = connectors[type][label].__class__ 194 cls_init_signature = inspect.signature(cls) 195 cls_init_params = cls_init_signature.parameters 196 if attribute not in cls_init_params: 197 warning_message = ( 198 f"Received new attribute '{attribute}' not present in connector " + 199 f"{connectors[type][label]}.\n" 200 ) 201 elif connectors[type][label].__dict__[attribute] != value: 202 warning_message = ( 203 f"Mismatched values for attribute '{attribute}' in connector " 204 + f"'{connectors[type][label]}'.\n" + 205 f" - Keyword value: '{value}'\n" + 206 f" - Existing value: '{connectors[type][label].__dict__[attribute]}'\n" 207 ) 208 if warning_message is not None: 209 warning_message += ( 210 "\nSetting `refresh` to True and recreating connector with type:" 211 + f" '{type}' and label '{label}'." 212 ) 213 refresh = True 214 warn(warning_message) 215 else: ### connector doesn't yet exist 216 refresh = True 217 218 ### only create an object if refresh is True 219 ### (can be manually specified, otherwise determined above) 220 if refresh: 221 with _locks['connectors']: 222 try: 223 ### will raise an error if configuration is incorrect / missing 224 conn = types[type](label=label, **kw) 225 connectors[type][label] = conn 226 except InvalidAttributesError as ie: 227 warn( 228 f"Incorrect attributes for connector '{type}:{label}'.\n" 229 + str(ie), 230 stack = False, 231 ) 232 conn = None 233 except Exception as e: 234 from meerschaum.utils.formatting import get_console 235 console = get_console() 236 if console: 237 console.print_exception() 238 warn( 239 f"Exception when creating connector '{type}:{label}'.\n" + str(e), 240 stack = False, 241 ) 242 conn = None 243 if conn is None: 244 return None 245 246 return connectors[type][label]
Return existing connector or create new connection and store for reuse.
You can create new connectors if enough parameters are provided for the given type and flavor.
Parameters
- type (Optional[str], default None):
Connector type (sql, api, etc.).
Defaults to the type of the configured
instance_connector
. - label (Optional[str], default None):
Connector label (e.g. main). Defaults to
'main'
. - refresh (bool, default False):
Refresh the Connector instance / construct new object. Defaults to
False
. - kw (Any):
Other arguments to pass to the Connector constructor.
If the Connector has already been constructed and new arguments are provided,
refresh
is set toTrue
and the old Connector is replaced.
Returns
- A new Meerschaum connector (e.g.
meerschaum.connectors.api.APIConnector
, meerschaum.connectors.sql.SQLConnector
).
Examples
The following parameters would create a new
meerschaum.connectors.sql.SQLConnector
that isn't in the configuration file.
>>> conn = get_connector(
... type = 'sql',
... label = 'newlabel',
... flavor = 'sqlite',
... database = '/file/path/to/database.db'
... )
>>>
82def get_config( 83 *keys: str, 84 patch: bool = True, 85 substitute: bool = True, 86 sync_files: bool = True, 87 write_missing: bool = True, 88 as_tuple: bool = False, 89 warn: bool = True, 90 debug: bool = False 91) -> Any: 92 """ 93 Return the Meerschaum configuration dictionary. 94 If positional arguments are provided, index by the keys. 95 Raises a warning if invalid keys are provided. 96 97 Parameters 98 ---------- 99 keys: str: 100 List of strings to index. 101 102 patch: bool, default True 103 If `True`, patch missing default keys into the config directory. 104 Defaults to `True`. 105 106 sync_files: bool, default True 107 If `True`, sync files if needed. 108 Defaults to `True`. 109 110 write_missing: bool, default True 111 If `True`, write default values when the main config files are missing. 112 Defaults to `True`. 113 114 substitute: bool, default True 115 If `True`, subsitute 'MRSM{}' values. 116 Defaults to `True`. 117 118 as_tuple: bool, default False 119 If `True`, return a tuple of type (success, value). 120 Defaults to `False`. 121 122 Returns 123 ------- 124 The value in the configuration directory, indexed by the provided keys. 125 126 Examples 127 -------- 128 >>> get_config('meerschaum', 'instance') 129 'sql:main' 130 >>> get_config('does', 'not', 'exist') 131 UserWarning: Invalid keys in config: ('does', 'not', 'exist') 132 """ 133 import json 134 135 symlinks_key = STATIC_CONFIG['config']['symlinks_key'] 136 if debug: 137 from meerschaum.utils.debug import dprint 138 dprint(f"Indexing keys: {keys}", color=False) 139 140 if len(keys) == 0: 141 _rc = _config(substitute=substitute, sync_files=sync_files, write_missing=write_missing) 142 if as_tuple: 143 return True, _rc 144 return _rc 145 146 ### Weird threading issues, only import if substitute is True. 147 if substitute: 148 from meerschaum.config._read_config import search_and_substitute_config 149 ### Invalidate the cache if it was read before with substitute=False 150 ### but there still exist substitutions. 151 if ( 152 config is not None and substitute and keys[0] != symlinks_key 153 and 'MRSM{' in json.dumps(config.get(keys[0])) 154 ): 155 try: 156 _subbed = search_and_substitute_config({keys[0]: config[keys[0]]}) 157 except Exception as e: 158 import traceback 159 traceback.print_exc() 160 config[keys[0]] = _subbed[keys[0]] 161 if symlinks_key in _subbed: 162 if symlinks_key not in config: 163 config[symlinks_key] = {} 164 if keys[0] not in config[symlinks_key]: 165 config[symlinks_key][keys[0]] = {} 166 config[symlinks_key][keys[0]] = apply_patch_to_config( 167 _subbed, 168 config[symlinks_key][keys[0]] 169 ) 170 171 from meerschaum.config._sync import sync_files as _sync_files 172 if config is None: 173 _config(*keys, sync_files=sync_files) 174 175 invalid_keys = False 176 if keys[0] not in config and keys[0] != symlinks_key: 177 single_key_config = read_config( 178 keys=[keys[0]], substitute=substitute, write_missing=write_missing 179 ) 180 if keys[0] not in single_key_config: 181 invalid_keys = True 182 else: 183 config[keys[0]] = single_key_config.get(keys[0], None) 184 if symlinks_key in single_key_config and keys[0] in single_key_config[symlinks_key]: 185 if symlinks_key not in config: 186 config[symlinks_key] = {} 187 config[symlinks_key][keys[0]] = single_key_config[symlinks_key][keys[0]] 188 189 if sync_files: 190 _sync_files(keys=[keys[0]]) 191 192 c = config 193 if len(keys) > 0: 194 for k in keys: 195 try: 196 c = c[k] 197 except Exception as e: 198 invalid_keys = True 199 break 200 if invalid_keys: 201 ### Check if the keys are in the default configuration. 202 from meerschaum.config._default import default_config 203 in_default = True 204 patched_default_config = ( 205 search_and_substitute_config(default_config) 206 if substitute else copy.deepcopy(default_config) 207 ) 208 _c = patched_default_config 209 for k in keys: 210 try: 211 _c = _c[k] 212 except Exception as e: 213 in_default = False 214 if in_default: 215 c = _c 216 invalid_keys = False 217 warning_msg = f"Invalid keys in config: {keys}" 218 if not in_default: 219 try: 220 if warn: 221 from meerschaum.utils.warnings import warn as _warn 222 _warn(warning_msg, stacklevel=3, color=False) 223 except Exception as e: 224 if warn: 225 print(warning_msg) 226 if as_tuple: 227 return False, None 228 return None 229 230 ### Don't write keys that we haven't yet loaded into memory. 231 not_loaded_keys = [k for k in patched_default_config if k not in config] 232 for k in not_loaded_keys: 233 patched_default_config.pop(k, None) 234 235 set_config( 236 apply_patch_to_config( 237 patched_default_config, 238 config, 239 ) 240 ) 241 if patch and keys[0] != symlinks_key: 242 if write_missing: 243 write_config(config, debug=debug) 244 245 if as_tuple: 246 return (not invalid_keys), c 247 return c
Return the Meerschaum configuration dictionary. If positional arguments are provided, index by the keys. Raises a warning if invalid keys are provided.
Parameters
- keys (str:): List of strings to index.
- patch (bool, default True):
If
True
, patch missing default keys into the config directory. Defaults toTrue
. - sync_files (bool, default True):
If
True
, sync files if needed. Defaults toTrue
. - write_missing (bool, default True):
If
True
, write default values when the main config files are missing. Defaults toTrue
. - substitute (bool, default True):
If
True
, subsitute 'MRSM{}' values. Defaults toTrue
. - as_tuple (bool, default False):
If
True
, return a tuple of type (success, value). Defaults toFalse
.
Returns
- The value in the configuration directory, indexed by the provided keys.
Examples
>>> get_config('meerschaum', 'instance')
'sql:main'
>>> get_config('does', 'not', 'exist')
UserWarning: Invalid keys in config: ('does', 'not', 'exist')
60class Pipe: 61 """ 62 Access Meerschaum pipes via Pipe objects. 63 64 Pipes are identified by the following: 65 66 1. Connector keys (e.g. `'sql:main'`) 67 2. Metric key (e.g. `'weather'`) 68 3. Location (optional; e.g. `None`) 69 70 A pipe's connector keys correspond to a data source, and when the pipe is synced, 71 its `fetch` definition is evaluated and executed to produce new data. 72 73 Alternatively, new data may be directly synced via `pipe.sync()`: 74 75 ``` 76 >>> from meerschaum import Pipe 77 >>> pipe = Pipe('csv', 'weather') 78 >>> 79 >>> import pandas as pd 80 >>> df = pd.read_csv('weather.csv') 81 >>> pipe.sync(df) 82 ``` 83 """ 84 85 from ._fetch import ( 86 fetch, 87 get_backtrack_interval, 88 ) 89 from ._data import ( 90 get_data, 91 get_backtrack_data, 92 get_rowcount, 93 _get_data_as_iterator, 94 get_chunk_interval, 95 get_chunk_bounds, 96 get_chunk_bounds_batches, 97 parse_date_bounds, 98 ) 99 from ._register import register 100 from ._attributes import ( 101 attributes, 102 parameters, 103 columns, 104 indices, 105 indexes, 106 dtypes, 107 autoincrement, 108 upsert, 109 static, 110 tzinfo, 111 enforce, 112 null_indices, 113 get_columns, 114 get_columns_types, 115 get_columns_indices, 116 get_indices, 117 tags, 118 get_id, 119 id, 120 get_val_column, 121 parents, 122 parent, 123 children, 124 target, 125 _target_legacy, 126 guess_datetime, 127 ) 128 from ._show import show 129 from ._edit import edit, edit_definition, update 130 from ._sync import ( 131 sync, 132 get_sync_time, 133 exists, 134 filter_existing, 135 _get_chunk_label, 136 get_num_workers, 137 _persist_new_json_columns, 138 _persist_new_numeric_columns, 139 _persist_new_uuid_columns, 140 _persist_new_bytes_columns, 141 _persist_new_geometry_columns, 142 ) 143 from ._verify import ( 144 verify, 145 get_bound_interval, 146 get_bound_time, 147 ) 148 from ._delete import delete 149 from ._drop import drop, drop_indices 150 from ._index import create_indices 151 from ._clear import clear 152 from ._deduplicate import deduplicate 153 from ._bootstrap import bootstrap 154 from ._dtypes import enforce_dtypes, infer_dtypes 155 from ._copy import copy_to 156 157 def __init__( 158 self, 159 connector: str = '', 160 metric: str = '', 161 location: Optional[str] = None, 162 parameters: Optional[Dict[str, Any]] = None, 163 columns: Union[Dict[str, str], List[str], None] = None, 164 indices: Optional[Dict[str, Union[str, List[str]]]] = None, 165 tags: Optional[List[str]] = None, 166 target: Optional[str] = None, 167 dtypes: Optional[Dict[str, str]] = None, 168 instance: Optional[Union[str, InstanceConnector]] = None, 169 temporary: bool = False, 170 upsert: Optional[bool] = None, 171 autoincrement: Optional[bool] = None, 172 static: Optional[bool] = None, 173 enforce: Optional[bool] = None, 174 null_indices: Optional[bool] = None, 175 mrsm_instance: Optional[Union[str, InstanceConnector]] = None, 176 cache: bool = False, 177 debug: bool = False, 178 connector_keys: Optional[str] = None, 179 metric_key: Optional[str] = None, 180 location_key: Optional[str] = None, 181 instance_keys: Optional[str] = None, 182 indexes: Union[Dict[str, str], List[str], None] = None, 183 ): 184 """ 185 Parameters 186 ---------- 187 connector: str 188 Keys for the pipe's source connector, e.g. `'sql:main'`. 189 190 metric: str 191 Label for the pipe's contents, e.g. `'weather'`. 192 193 location: str, default None 194 Label for the pipe's location. Defaults to `None`. 195 196 parameters: Optional[Dict[str, Any]], default None 197 Optionally set a pipe's parameters from the constructor, 198 e.g. columns and other attributes. 199 You can edit these parameters with `edit pipes`. 200 201 columns: Union[Dict[str, str], List[str], None], default None 202 Set the `columns` dictionary of `parameters`. 203 If `parameters` is also provided, this dictionary is added under the `'columns'` key. 204 205 indices: Optional[Dict[str, Union[str, List[str]]]], default None 206 Set the `indices` dictionary of `parameters`. 207 If `parameters` is also provided, this dictionary is added under the `'indices'` key. 208 209 tags: Optional[List[str]], default None 210 A list of strings to be added under the `'tags'` key of `parameters`. 211 You can select pipes with certain tags using `--tags`. 212 213 dtypes: Optional[Dict[str, str]], default None 214 Set the `dtypes` dictionary of `parameters`. 215 If `parameters` is also provided, this dictionary is added under the `'dtypes'` key. 216 217 mrsm_instance: Optional[Union[str, InstanceConnector]], default None 218 Connector for the Meerschaum instance where the pipe resides. 219 Defaults to the preconfigured default instance (`'sql:main'`). 220 221 instance: Optional[Union[str, InstanceConnector]], default None 222 Alias for `mrsm_instance`. If `mrsm_instance` is supplied, this value is ignored. 223 224 upsert: Optional[bool], default None 225 If `True`, set `upsert` to `True` in the parameters. 226 227 autoincrement: Optional[bool], default None 228 If `True`, set `autoincrement` in the parameters. 229 230 static: Optional[bool], default None 231 If `True`, set `static` in the parameters. 232 233 enforce: Optional[bool], default None 234 If `False`, skip data type enforcement. 235 Default behavior is `True`. 236 237 null_indices: Optional[bool], default None 238 Set to `False` if there will be no null values in the index columns. 239 Defaults to `True`. 240 241 temporary: bool, default False 242 If `True`, prevent instance tables (pipes, users, plugins) from being created. 243 244 cache: bool, default False 245 If `True`, cache fetched data into a local database file. 246 Defaults to `False`. 247 """ 248 from meerschaum.utils.warnings import error, warn 249 if (not connector and not connector_keys) or (not metric and not metric_key): 250 error( 251 "Please provide strings for the connector and metric\n " 252 + "(first two positional arguments)." 253 ) 254 255 ### Fall back to legacy `location_key` just in case. 256 if not location: 257 location = location_key 258 259 if not connector: 260 connector = connector_keys 261 262 if not metric: 263 metric = metric_key 264 265 if location in ('[None]', 'None'): 266 location = None 267 268 from meerschaum.config.static import STATIC_CONFIG 269 negation_prefix = STATIC_CONFIG['system']['fetch_pipes_keys']['negation_prefix'] 270 for k in (connector, metric, location, *(tags or [])): 271 if str(k).startswith(negation_prefix): 272 error(f"A pipe's keys and tags cannot start with the prefix '{negation_prefix}'.") 273 274 self.connector_keys = str(connector) 275 self.connector_key = self.connector_keys ### Alias 276 self.metric_key = metric 277 self.location_key = location 278 self.temporary = temporary 279 280 self._attributes = { 281 'connector_keys': self.connector_keys, 282 'metric_key': self.metric_key, 283 'location_key': self.location_key, 284 'parameters': {}, 285 } 286 287 ### only set parameters if values are provided 288 if isinstance(parameters, dict): 289 self._attributes['parameters'] = parameters 290 else: 291 if parameters is not None: 292 warn(f"The provided parameters are of invalid type '{type(parameters)}'.") 293 self._attributes['parameters'] = {} 294 295 columns = columns or self._attributes.get('parameters', {}).get('columns', {}) 296 if isinstance(columns, list): 297 columns = {str(col): str(col) for col in columns} 298 if isinstance(columns, dict): 299 self._attributes['parameters']['columns'] = columns 300 elif columns is not None: 301 warn(f"The provided columns are of invalid type '{type(columns)}'.") 302 303 indices = ( 304 indices 305 or indexes 306 or self._attributes.get('parameters', {}).get('indices', None) 307 or self._attributes.get('parameters', {}).get('indexes', None) 308 ) 309 if isinstance(indices, dict): 310 indices_key = ( 311 'indexes' 312 if 'indexes' in self._attributes['parameters'] 313 else 'indices' 314 ) 315 self._attributes['parameters'][indices_key] = indices 316 317 if isinstance(tags, (list, tuple)): 318 self._attributes['parameters']['tags'] = tags 319 elif tags is not None: 320 warn(f"The provided tags are of invalid type '{type(tags)}'.") 321 322 if isinstance(target, str): 323 self._attributes['parameters']['target'] = target 324 elif target is not None: 325 warn(f"The provided target is of invalid type '{type(target)}'.") 326 327 if isinstance(dtypes, dict): 328 self._attributes['parameters']['dtypes'] = dtypes 329 elif dtypes is not None: 330 warn(f"The provided dtypes are of invalid type '{type(dtypes)}'.") 331 332 if isinstance(upsert, bool): 333 self._attributes['parameters']['upsert'] = upsert 334 335 if isinstance(autoincrement, bool): 336 self._attributes['parameters']['autoincrement'] = autoincrement 337 338 if isinstance(static, bool): 339 self._attributes['parameters']['static'] = static 340 341 if isinstance(enforce, bool): 342 self._attributes['parameters']['enforce'] = enforce 343 344 if isinstance(null_indices, bool): 345 self._attributes['parameters']['null_indices'] = null_indices 346 347 ### NOTE: The parameters dictionary is {} by default. 348 ### A Pipe may be registered without parameters, then edited, 349 ### or a Pipe may be registered with parameters set in-memory first. 350 _mrsm_instance = mrsm_instance if mrsm_instance is not None else (instance or instance_keys) 351 if _mrsm_instance is None: 352 _mrsm_instance = get_config('meerschaum', 'instance', patch=True) 353 354 if not isinstance(_mrsm_instance, str): 355 self._instance_connector = _mrsm_instance 356 self.instance_keys = str(_mrsm_instance) 357 else: ### NOTE: must be SQL or API Connector for this work 358 self.instance_keys = _mrsm_instance 359 360 self._cache = cache and get_config('system', 'experimental', 'cache') 361 362 @property 363 def meta(self): 364 """ 365 Return the four keys needed to reconstruct this pipe. 366 """ 367 return { 368 'connector_keys': self.connector_keys, 369 'metric_key': self.metric_key, 370 'location_key': self.location_key, 371 'instance_keys': self.instance_keys, 372 } 373 374 def keys(self) -> List[str]: 375 """ 376 Return the ordered keys for this pipe. 377 """ 378 return { 379 key: val 380 for key, val in self.meta.items() 381 if key != 'instance' 382 } 383 384 @property 385 def instance_connector(self) -> Union[InstanceConnector, None]: 386 """ 387 The connector to where this pipe resides. 388 May either be of type `meerschaum.connectors.sql.SQLConnector` or 389 `meerschaum.connectors.api.APIConnector`. 390 """ 391 if '_instance_connector' not in self.__dict__: 392 from meerschaum.connectors.parse import parse_instance_keys 393 conn = parse_instance_keys(self.instance_keys) 394 if conn: 395 self._instance_connector = conn 396 else: 397 return None 398 return self._instance_connector 399 400 @property 401 def connector(self) -> Union[meerschaum.connectors.Connector, None]: 402 """ 403 The connector to the data source. 404 """ 405 if '_connector' not in self.__dict__: 406 from meerschaum.connectors.parse import parse_instance_keys 407 import warnings 408 with warnings.catch_warnings(): 409 warnings.simplefilter('ignore') 410 try: 411 conn = parse_instance_keys(self.connector_keys) 412 except Exception: 413 conn = None 414 if conn: 415 self._connector = conn 416 else: 417 return None 418 return self._connector 419 420 @property 421 def cache_connector(self) -> Union[meerschaum.connectors.sql.SQLConnector, None]: 422 """ 423 If the pipe was created with `cache=True`, return the connector to the pipe's 424 SQLite database for caching. 425 """ 426 if not self._cache: 427 return None 428 429 if '_cache_connector' not in self.__dict__: 430 from meerschaum.connectors import get_connector 431 from meerschaum.config._paths import DUCKDB_RESOURCES_PATH, SQLITE_RESOURCES_PATH 432 _resources_path = SQLITE_RESOURCES_PATH 433 self._cache_connector = get_connector( 434 'sql', '_cache_' + str(self), 435 flavor='sqlite', 436 database=str(_resources_path / ('_cache_' + str(self) + '.db')), 437 ) 438 439 return self._cache_connector 440 441 @property 442 def cache_pipe(self) -> Union['meerschaum.Pipe', None]: 443 """ 444 If the pipe was created with `cache=True`, return another `meerschaum.Pipe` used to 445 manage the local data. 446 """ 447 if self.cache_connector is None: 448 return None 449 if '_cache_pipe' not in self.__dict__: 450 from meerschaum.config._patch import apply_patch_to_config 451 from meerschaum.utils.sql import sql_item_name 452 _parameters = copy.deepcopy(self.parameters) 453 _fetch_patch = { 454 'fetch': ({ 455 'definition': ( 456 "SELECT * FROM " 457 + sql_item_name( 458 str(self.target), 459 self.instance_connector.flavor, 460 self.instance_connector.get_pipe_schema(self), 461 ) 462 ), 463 }) if self.instance_connector.type == 'sql' else ({ 464 'connector_keys': self.connector_keys, 465 'metric_key': self.metric_key, 466 'location_key': self.location_key, 467 }) 468 } 469 _parameters = apply_patch_to_config(_parameters, _fetch_patch) 470 self._cache_pipe = Pipe( 471 self.instance_keys, 472 (self.connector_keys + '_' + self.metric_key + '_cache'), 473 self.location_key, 474 mrsm_instance = self.cache_connector, 475 parameters = _parameters, 476 cache = False, 477 temporary = True, 478 ) 479 480 return self._cache_pipe 481 482 def __str__(self, ansi: bool=False): 483 return pipe_repr(self, ansi=ansi) 484 485 def __eq__(self, other): 486 try: 487 return ( 488 isinstance(self, type(other)) 489 and self.connector_keys == other.connector_keys 490 and self.metric_key == other.metric_key 491 and self.location_key == other.location_key 492 and self.instance_keys == other.instance_keys 493 ) 494 except Exception: 495 return False 496 497 def __hash__(self): 498 ### Using an esoteric separator to avoid collisions. 499 sep = "[\"']" 500 return hash( 501 str(self.connector_keys) + sep 502 + str(self.metric_key) + sep 503 + str(self.location_key) + sep 504 + str(self.instance_keys) + sep 505 ) 506 507 def __repr__(self, ansi: bool=True, **kw) -> str: 508 if not hasattr(sys, 'ps1'): 509 ansi = False 510 511 return pipe_repr(self, ansi=ansi, **kw) 512 513 def __pt_repr__(self): 514 from meerschaum.utils.packages import attempt_import 515 prompt_toolkit_formatted_text = attempt_import('prompt_toolkit.formatted_text', lazy=False) 516 return prompt_toolkit_formatted_text.ANSI(pipe_repr(self, ansi=True)) 517 518 def __getstate__(self) -> Dict[str, Any]: 519 """ 520 Define the state dictionary (pickling). 521 """ 522 return { 523 'connector_keys': self.connector_keys, 524 'metric_key': self.metric_key, 525 'location_key': self.location_key, 526 'parameters': self.parameters, 527 'instance_keys': self.instance_keys, 528 } 529 530 def __setstate__(self, _state: Dict[str, Any]): 531 """ 532 Read the state (unpickling). 533 """ 534 self.__init__(**_state) 535 536 def __getitem__(self, key: str) -> Any: 537 """ 538 Index the pipe's attributes. 539 If the `key` cannot be found`, return `None`. 540 """ 541 if key in self.attributes: 542 return self.attributes.get(key, None) 543 544 aliases = { 545 'connector': 'connector_keys', 546 'connector_key': 'connector_keys', 547 'metric': 'metric_key', 548 'location': 'location_key', 549 } 550 aliased_key = aliases.get(key, None) 551 if aliased_key is not None: 552 return self.attributes.get(aliased_key, None) 553 554 property_aliases = { 555 'instance': 'instance_keys', 556 'instance_key': 'instance_keys', 557 } 558 aliased_key = property_aliases.get(key, None) 559 if aliased_key is not None: 560 key = aliased_key 561 return getattr(self, key, None)
Access Meerschaum pipes via Pipe objects.
Pipes are identified by the following:
- Connector keys (e.g.
'sql:main'
) - Metric key (e.g.
'weather'
) - Location (optional; e.g.
None
)
A pipe's connector keys correspond to a data source, and when the pipe is synced,
its fetch
definition is evaluated and executed to produce new data.
Alternatively, new data may be directly synced via pipe.sync()
:
>>> from meerschaum import Pipe
>>> pipe = Pipe('csv', 'weather')
>>>
>>> import pandas as pd
>>> df = pd.read_csv('weather.csv')
>>> pipe.sync(df)
157 def __init__( 158 self, 159 connector: str = '', 160 metric: str = '', 161 location: Optional[str] = None, 162 parameters: Optional[Dict[str, Any]] = None, 163 columns: Union[Dict[str, str], List[str], None] = None, 164 indices: Optional[Dict[str, Union[str, List[str]]]] = None, 165 tags: Optional[List[str]] = None, 166 target: Optional[str] = None, 167 dtypes: Optional[Dict[str, str]] = None, 168 instance: Optional[Union[str, InstanceConnector]] = None, 169 temporary: bool = False, 170 upsert: Optional[bool] = None, 171 autoincrement: Optional[bool] = None, 172 static: Optional[bool] = None, 173 enforce: Optional[bool] = None, 174 null_indices: Optional[bool] = None, 175 mrsm_instance: Optional[Union[str, InstanceConnector]] = None, 176 cache: bool = False, 177 debug: bool = False, 178 connector_keys: Optional[str] = None, 179 metric_key: Optional[str] = None, 180 location_key: Optional[str] = None, 181 instance_keys: Optional[str] = None, 182 indexes: Union[Dict[str, str], List[str], None] = None, 183 ): 184 """ 185 Parameters 186 ---------- 187 connector: str 188 Keys for the pipe's source connector, e.g. `'sql:main'`. 189 190 metric: str 191 Label for the pipe's contents, e.g. `'weather'`. 192 193 location: str, default None 194 Label for the pipe's location. Defaults to `None`. 195 196 parameters: Optional[Dict[str, Any]], default None 197 Optionally set a pipe's parameters from the constructor, 198 e.g. columns and other attributes. 199 You can edit these parameters with `edit pipes`. 200 201 columns: Union[Dict[str, str], List[str], None], default None 202 Set the `columns` dictionary of `parameters`. 203 If `parameters` is also provided, this dictionary is added under the `'columns'` key. 204 205 indices: Optional[Dict[str, Union[str, List[str]]]], default None 206 Set the `indices` dictionary of `parameters`. 207 If `parameters` is also provided, this dictionary is added under the `'indices'` key. 208 209 tags: Optional[List[str]], default None 210 A list of strings to be added under the `'tags'` key of `parameters`. 211 You can select pipes with certain tags using `--tags`. 212 213 dtypes: Optional[Dict[str, str]], default None 214 Set the `dtypes` dictionary of `parameters`. 215 If `parameters` is also provided, this dictionary is added under the `'dtypes'` key. 216 217 mrsm_instance: Optional[Union[str, InstanceConnector]], default None 218 Connector for the Meerschaum instance where the pipe resides. 219 Defaults to the preconfigured default instance (`'sql:main'`). 220 221 instance: Optional[Union[str, InstanceConnector]], default None 222 Alias for `mrsm_instance`. If `mrsm_instance` is supplied, this value is ignored. 223 224 upsert: Optional[bool], default None 225 If `True`, set `upsert` to `True` in the parameters. 226 227 autoincrement: Optional[bool], default None 228 If `True`, set `autoincrement` in the parameters. 229 230 static: Optional[bool], default None 231 If `True`, set `static` in the parameters. 232 233 enforce: Optional[bool], default None 234 If `False`, skip data type enforcement. 235 Default behavior is `True`. 236 237 null_indices: Optional[bool], default None 238 Set to `False` if there will be no null values in the index columns. 239 Defaults to `True`. 240 241 temporary: bool, default False 242 If `True`, prevent instance tables (pipes, users, plugins) from being created. 243 244 cache: bool, default False 245 If `True`, cache fetched data into a local database file. 246 Defaults to `False`. 247 """ 248 from meerschaum.utils.warnings import error, warn 249 if (not connector and not connector_keys) or (not metric and not metric_key): 250 error( 251 "Please provide strings for the connector and metric\n " 252 + "(first two positional arguments)." 253 ) 254 255 ### Fall back to legacy `location_key` just in case. 256 if not location: 257 location = location_key 258 259 if not connector: 260 connector = connector_keys 261 262 if not metric: 263 metric = metric_key 264 265 if location in ('[None]', 'None'): 266 location = None 267 268 from meerschaum.config.static import STATIC_CONFIG 269 negation_prefix = STATIC_CONFIG['system']['fetch_pipes_keys']['negation_prefix'] 270 for k in (connector, metric, location, *(tags or [])): 271 if str(k).startswith(negation_prefix): 272 error(f"A pipe's keys and tags cannot start with the prefix '{negation_prefix}'.") 273 274 self.connector_keys = str(connector) 275 self.connector_key = self.connector_keys ### Alias 276 self.metric_key = metric 277 self.location_key = location 278 self.temporary = temporary 279 280 self._attributes = { 281 'connector_keys': self.connector_keys, 282 'metric_key': self.metric_key, 283 'location_key': self.location_key, 284 'parameters': {}, 285 } 286 287 ### only set parameters if values are provided 288 if isinstance(parameters, dict): 289 self._attributes['parameters'] = parameters 290 else: 291 if parameters is not None: 292 warn(f"The provided parameters are of invalid type '{type(parameters)}'.") 293 self._attributes['parameters'] = {} 294 295 columns = columns or self._attributes.get('parameters', {}).get('columns', {}) 296 if isinstance(columns, list): 297 columns = {str(col): str(col) for col in columns} 298 if isinstance(columns, dict): 299 self._attributes['parameters']['columns'] = columns 300 elif columns is not None: 301 warn(f"The provided columns are of invalid type '{type(columns)}'.") 302 303 indices = ( 304 indices 305 or indexes 306 or self._attributes.get('parameters', {}).get('indices', None) 307 or self._attributes.get('parameters', {}).get('indexes', None) 308 ) 309 if isinstance(indices, dict): 310 indices_key = ( 311 'indexes' 312 if 'indexes' in self._attributes['parameters'] 313 else 'indices' 314 ) 315 self._attributes['parameters'][indices_key] = indices 316 317 if isinstance(tags, (list, tuple)): 318 self._attributes['parameters']['tags'] = tags 319 elif tags is not None: 320 warn(f"The provided tags are of invalid type '{type(tags)}'.") 321 322 if isinstance(target, str): 323 self._attributes['parameters']['target'] = target 324 elif target is not None: 325 warn(f"The provided target is of invalid type '{type(target)}'.") 326 327 if isinstance(dtypes, dict): 328 self._attributes['parameters']['dtypes'] = dtypes 329 elif dtypes is not None: 330 warn(f"The provided dtypes are of invalid type '{type(dtypes)}'.") 331 332 if isinstance(upsert, bool): 333 self._attributes['parameters']['upsert'] = upsert 334 335 if isinstance(autoincrement, bool): 336 self._attributes['parameters']['autoincrement'] = autoincrement 337 338 if isinstance(static, bool): 339 self._attributes['parameters']['static'] = static 340 341 if isinstance(enforce, bool): 342 self._attributes['parameters']['enforce'] = enforce 343 344 if isinstance(null_indices, bool): 345 self._attributes['parameters']['null_indices'] = null_indices 346 347 ### NOTE: The parameters dictionary is {} by default. 348 ### A Pipe may be registered without parameters, then edited, 349 ### or a Pipe may be registered with parameters set in-memory first. 350 _mrsm_instance = mrsm_instance if mrsm_instance is not None else (instance or instance_keys) 351 if _mrsm_instance is None: 352 _mrsm_instance = get_config('meerschaum', 'instance', patch=True) 353 354 if not isinstance(_mrsm_instance, str): 355 self._instance_connector = _mrsm_instance 356 self.instance_keys = str(_mrsm_instance) 357 else: ### NOTE: must be SQL or API Connector for this work 358 self.instance_keys = _mrsm_instance 359 360 self._cache = cache and get_config('system', 'experimental', 'cache')
Parameters
- connector (str):
Keys for the pipe's source connector, e.g.
'sql:main'
. - metric (str):
Label for the pipe's contents, e.g.
'weather'
. - location (str, default None):
Label for the pipe's location. Defaults to
None
. - parameters (Optional[Dict[str, Any]], default None):
Optionally set a pipe's parameters from the constructor,
e.g. columns and other attributes.
You can edit these parameters with
edit pipes
. - columns (Union[Dict[str, str], List[str], None], default None):
Set the
columns
dictionary ofparameters
. Ifparameters
is also provided, this dictionary is added under the'columns'
key. - indices (Optional[Dict[str, Union[str, List[str]]]], default None):
Set the
indices
dictionary ofparameters
. Ifparameters
is also provided, this dictionary is added under the'indices'
key. - tags (Optional[List[str]], default None):
A list of strings to be added under the
'tags'
key ofparameters
. You can select pipes with certain tags using--tags
. - dtypes (Optional[Dict[str, str]], default None):
Set the
dtypes
dictionary ofparameters
. Ifparameters
is also provided, this dictionary is added under the'dtypes'
key. - mrsm_instance (Optional[Union[str, InstanceConnector]], default None):
Connector for the Meerschaum instance where the pipe resides.
Defaults to the preconfigured default instance (
'sql:main'
). - instance (Optional[Union[str, InstanceConnector]], default None):
Alias for
mrsm_instance
. Ifmrsm_instance
is supplied, this value is ignored. - upsert (Optional[bool], default None):
If
True
, setupsert
toTrue
in the parameters. - autoincrement (Optional[bool], default None):
If
True
, setautoincrement
in the parameters. - static (Optional[bool], default None):
If
True
, setstatic
in the parameters. - enforce (Optional[bool], default None):
If
False
, skip data type enforcement. Default behavior isTrue
. - null_indices (Optional[bool], default None):
Set to
False
if there will be no null values in the index columns. Defaults toTrue
. - temporary (bool, default False):
If
True
, prevent instance tables (pipes, users, plugins) from being created. - cache (bool, default False):
If
True
, cache fetched data into a local database file. Defaults toFalse
.
362 @property 363 def meta(self): 364 """ 365 Return the four keys needed to reconstruct this pipe. 366 """ 367 return { 368 'connector_keys': self.connector_keys, 369 'metric_key': self.metric_key, 370 'location_key': self.location_key, 371 'instance_keys': self.instance_keys, 372 }
Return the four keys needed to reconstruct this pipe.
374 def keys(self) -> List[str]: 375 """ 376 Return the ordered keys for this pipe. 377 """ 378 return { 379 key: val 380 for key, val in self.meta.items() 381 if key != 'instance' 382 }
Return the ordered keys for this pipe.
384 @property 385 def instance_connector(self) -> Union[InstanceConnector, None]: 386 """ 387 The connector to where this pipe resides. 388 May either be of type `meerschaum.connectors.sql.SQLConnector` or 389 `meerschaum.connectors.api.APIConnector`. 390 """ 391 if '_instance_connector' not in self.__dict__: 392 from meerschaum.connectors.parse import parse_instance_keys 393 conn = parse_instance_keys(self.instance_keys) 394 if conn: 395 self._instance_connector = conn 396 else: 397 return None 398 return self._instance_connector
The connector to where this pipe resides.
May either be of type meerschaum.connectors.sql.SQLConnector
or
meerschaum.connectors.api.APIConnector
.
400 @property 401 def connector(self) -> Union[meerschaum.connectors.Connector, None]: 402 """ 403 The connector to the data source. 404 """ 405 if '_connector' not in self.__dict__: 406 from meerschaum.connectors.parse import parse_instance_keys 407 import warnings 408 with warnings.catch_warnings(): 409 warnings.simplefilter('ignore') 410 try: 411 conn = parse_instance_keys(self.connector_keys) 412 except Exception: 413 conn = None 414 if conn: 415 self._connector = conn 416 else: 417 return None 418 return self._connector
The connector to the data source.
420 @property 421 def cache_connector(self) -> Union[meerschaum.connectors.sql.SQLConnector, None]: 422 """ 423 If the pipe was created with `cache=True`, return the connector to the pipe's 424 SQLite database for caching. 425 """ 426 if not self._cache: 427 return None 428 429 if '_cache_connector' not in self.__dict__: 430 from meerschaum.connectors import get_connector 431 from meerschaum.config._paths import DUCKDB_RESOURCES_PATH, SQLITE_RESOURCES_PATH 432 _resources_path = SQLITE_RESOURCES_PATH 433 self._cache_connector = get_connector( 434 'sql', '_cache_' + str(self), 435 flavor='sqlite', 436 database=str(_resources_path / ('_cache_' + str(self) + '.db')), 437 ) 438 439 return self._cache_connector
If the pipe was created with cache=True
, return the connector to the pipe's
SQLite database for caching.
441 @property 442 def cache_pipe(self) -> Union['meerschaum.Pipe', None]: 443 """ 444 If the pipe was created with `cache=True`, return another `meerschaum.Pipe` used to 445 manage the local data. 446 """ 447 if self.cache_connector is None: 448 return None 449 if '_cache_pipe' not in self.__dict__: 450 from meerschaum.config._patch import apply_patch_to_config 451 from meerschaum.utils.sql import sql_item_name 452 _parameters = copy.deepcopy(self.parameters) 453 _fetch_patch = { 454 'fetch': ({ 455 'definition': ( 456 "SELECT * FROM " 457 + sql_item_name( 458 str(self.target), 459 self.instance_connector.flavor, 460 self.instance_connector.get_pipe_schema(self), 461 ) 462 ), 463 }) if self.instance_connector.type == 'sql' else ({ 464 'connector_keys': self.connector_keys, 465 'metric_key': self.metric_key, 466 'location_key': self.location_key, 467 }) 468 } 469 _parameters = apply_patch_to_config(_parameters, _fetch_patch) 470 self._cache_pipe = Pipe( 471 self.instance_keys, 472 (self.connector_keys + '_' + self.metric_key + '_cache'), 473 self.location_key, 474 mrsm_instance = self.cache_connector, 475 parameters = _parameters, 476 cache = False, 477 temporary = True, 478 ) 479 480 return self._cache_pipe
If the pipe was created with cache=True
, return another meerschaum.Pipe
used to
manage the local data.
21def fetch( 22 self, 23 begin: Union[datetime, int, str, None] = '', 24 end: Union[datetime, int, None] = None, 25 check_existing: bool = True, 26 sync_chunks: bool = False, 27 debug: bool = False, 28 **kw: Any 29) -> Union['pd.DataFrame', Iterator['pd.DataFrame'], None]: 30 """ 31 Fetch a Pipe's latest data from its connector. 32 33 Parameters 34 ---------- 35 begin: Union[datetime, str, None], default '': 36 If provided, only fetch data newer than or equal to `begin`. 37 38 end: Optional[datetime], default None: 39 If provided, only fetch data older than or equal to `end`. 40 41 check_existing: bool, default True 42 If `False`, do not apply the backtrack interval. 43 44 sync_chunks: bool, default False 45 If `True` and the pipe's connector is of type `'sql'`, begin syncing chunks while fetching 46 loads chunks into memory. 47 48 debug: bool, default False 49 Verbosity toggle. 50 51 Returns 52 ------- 53 A `pd.DataFrame` of the newest unseen data. 54 55 """ 56 if 'fetch' not in dir(self.connector): 57 warn(f"No `fetch()` function defined for connector '{self.connector}'") 58 return None 59 60 from meerschaum.connectors import get_connector_plugin 61 from meerschaum.utils.misc import filter_arguments 62 63 _chunk_hook = kw.pop('chunk_hook', None) 64 kw['workers'] = self.get_num_workers(kw.get('workers', None)) 65 if sync_chunks and _chunk_hook is None: 66 67 def _chunk_hook(chunk, **_kw) -> SuccessTuple: 68 """ 69 Wrap `Pipe.sync()` with a custom chunk label prepended to the message. 70 """ 71 from meerschaum.config._patch import apply_patch_to_config 72 kwargs = apply_patch_to_config(kw, _kw) 73 chunk_success, chunk_message = self.sync(chunk, **kwargs) 74 chunk_label = self._get_chunk_label(chunk, self.columns.get('datetime', None)) 75 if chunk_label: 76 chunk_message = '\n' + chunk_label + '\n' + chunk_message 77 return chunk_success, chunk_message 78 79 begin, end = self.parse_date_bounds(begin, end) 80 81 with mrsm.Venv(get_connector_plugin(self.connector)): 82 _args, _kwargs = filter_arguments( 83 self.connector.fetch, 84 self, 85 begin=_determine_begin( 86 self, 87 begin, 88 end, 89 check_existing=check_existing, 90 debug=debug, 91 ), 92 end=end, 93 chunk_hook=_chunk_hook, 94 debug=debug, 95 **kw 96 ) 97 df = self.connector.fetch(*_args, **_kwargs) 98 return df
Fetch a Pipe's latest data from its connector.
Parameters
- begin (Union[datetime, str, None], default '':):
If provided, only fetch data newer than or equal to
begin
. - end (Optional[datetime], default None:):
If provided, only fetch data older than or equal to
end
. - check_existing (bool, default True):
If
False
, do not apply the backtrack interval. - sync_chunks (bool, default False):
If
True
and the pipe's connector is of type'sql'
, begin syncing chunks while fetching loads chunks into memory. - debug (bool, default False): Verbosity toggle.
Returns
- A
pd.DataFrame
of the newest unseen data.
101def get_backtrack_interval( 102 self, 103 check_existing: bool = True, 104 debug: bool = False, 105) -> Union[timedelta, int]: 106 """ 107 Get the chunk interval to use for this pipe. 108 109 Parameters 110 ---------- 111 check_existing: bool, default True 112 If `False`, return a backtrack_interval of 0 minutes. 113 114 Returns 115 ------- 116 The backtrack interval (`timedelta` or `int`) to use with this pipe's `datetime` axis. 117 """ 118 default_backtrack_minutes = get_config('pipes', 'parameters', 'fetch', 'backtrack_minutes') 119 configured_backtrack_minutes = self.parameters.get('fetch', {}).get('backtrack_minutes', None) 120 backtrack_minutes = ( 121 configured_backtrack_minutes 122 if configured_backtrack_minutes is not None 123 else default_backtrack_minutes 124 ) if check_existing else 0 125 126 backtrack_interval = timedelta(minutes=backtrack_minutes) 127 dt_col = self.columns.get('datetime', None) 128 if dt_col is None: 129 return backtrack_interval 130 131 dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') 132 if 'int' in dt_dtype.lower(): 133 return backtrack_minutes 134 135 return backtrack_interval
Get the chunk interval to use for this pipe.
Parameters
- check_existing (bool, default True):
If
False
, return a backtrack_interval of 0 minutes.
Returns
- The backtrack interval (
timedelta
orint
) to use with this pipe'sdatetime
axis.
23def get_data( 24 self, 25 select_columns: Optional[List[str]] = None, 26 omit_columns: Optional[List[str]] = None, 27 begin: Union[datetime, int, str, None] = None, 28 end: Union[datetime, int, str, None] = None, 29 params: Optional[Dict[str, Any]] = None, 30 as_iterator: bool = False, 31 as_chunks: bool = False, 32 as_dask: bool = False, 33 chunk_interval: Union[timedelta, int, None] = None, 34 order: Optional[str] = 'asc', 35 limit: Optional[int] = None, 36 fresh: bool = False, 37 debug: bool = False, 38 **kw: Any 39) -> Union['pd.DataFrame', Iterator['pd.DataFrame'], None]: 40 """ 41 Get a pipe's data from the instance connector. 42 43 Parameters 44 ---------- 45 select_columns: Optional[List[str]], default None 46 If provided, only select these given columns. 47 Otherwise select all available columns (i.e. `SELECT *`). 48 49 omit_columns: Optional[List[str]], default None 50 If provided, remove these columns from the selection. 51 52 begin: Union[datetime, int, str, None], default None 53 Lower bound datetime to begin searching for data (inclusive). 54 Translates to a `WHERE` clause like `WHERE datetime >= begin`. 55 Defaults to `None`. 56 57 end: Union[datetime, int, str, None], default None 58 Upper bound datetime to stop searching for data (inclusive). 59 Translates to a `WHERE` clause like `WHERE datetime < end`. 60 Defaults to `None`. 61 62 params: Optional[Dict[str, Any]], default None 63 Filter the retrieved data by a dictionary of parameters. 64 See `meerschaum.utils.sql.build_where` for more details. 65 66 as_iterator: bool, default False 67 If `True`, return a generator of chunks of pipe data. 68 69 as_chunks: bool, default False 70 Alias for `as_iterator`. 71 72 as_dask: bool, default False 73 If `True`, return a `dask.DataFrame` 74 (which may be loaded into a Pandas DataFrame with `df.compute()`). 75 76 chunk_interval: Union[timedelta, int, None], default None 77 If `as_iterator`, then return chunks with `begin` and `end` separated by this interval. 78 This may be set under `pipe.parameters['chunk_minutes']`. 79 By default, use a timedelta of 1440 minutes (1 day). 80 If `chunk_interval` is an integer and the `datetime` axis a timestamp, 81 the use a timedelta with the number of minutes configured to this value. 82 If the `datetime` axis is an integer, default to the configured chunksize. 83 If `chunk_interval` is a `timedelta` and the `datetime` axis an integer, 84 use the number of minutes in the `timedelta`. 85 86 order: Optional[str], default 'asc' 87 If `order` is not `None`, sort the resulting dataframe by indices. 88 89 limit: Optional[int], default None 90 If provided, cap the dataframe to this many rows. 91 92 fresh: bool, default False 93 If `True`, skip local cache and directly query the instance connector. 94 95 debug: bool, default False 96 Verbosity toggle. 97 Defaults to `False`. 98 99 Returns 100 ------- 101 A `pd.DataFrame` for the pipe's data corresponding to the provided parameters. 102 103 """ 104 from meerschaum.utils.warnings import warn 105 from meerschaum.utils.venv import Venv 106 from meerschaum.connectors import get_connector_plugin 107 from meerschaum.utils.misc import iterate_chunks, items_str 108 from meerschaum.utils.dtypes import to_pandas_dtype, coerce_timezone 109 from meerschaum.utils.dataframe import add_missing_cols_to_df, df_is_chunk_generator 110 from meerschaum.utils.packages import attempt_import 111 dd = attempt_import('dask.dataframe') if as_dask else None 112 dask = attempt_import('dask') if as_dask else None 113 dateutil_parser = attempt_import('dateutil.parser') 114 115 if select_columns == '*': 116 select_columns = None 117 elif isinstance(select_columns, str): 118 select_columns = [select_columns] 119 120 if isinstance(omit_columns, str): 121 omit_columns = [omit_columns] 122 123 begin, end = self.parse_date_bounds(begin, end) 124 as_iterator = as_iterator or as_chunks 125 dt_col = self.columns.get('datetime', None) 126 127 def _sort_df(_df): 128 if df_is_chunk_generator(_df): 129 return _df 130 indices = [] if dt_col not in _df.columns else [dt_col] 131 non_dt_cols = [ 132 col 133 for col_ix, col in self.columns.items() 134 if col_ix != 'datetime' and col in _df.columns 135 ] 136 indices.extend(non_dt_cols) 137 if 'dask' not in _df.__module__: 138 _df.sort_values( 139 by=indices, 140 inplace=True, 141 ascending=(str(order).lower() == 'asc'), 142 ) 143 _df.reset_index(drop=True, inplace=True) 144 else: 145 _df = _df.sort_values( 146 by=indices, 147 ascending=(str(order).lower() == 'asc'), 148 ) 149 _df = _df.reset_index(drop=True) 150 if limit is not None and len(_df) > limit: 151 return _df.head(limit) 152 return _df 153 154 if as_iterator or as_chunks: 155 df = self._get_data_as_iterator( 156 select_columns=select_columns, 157 omit_columns=omit_columns, 158 begin=begin, 159 end=end, 160 params=params, 161 chunk_interval=chunk_interval, 162 limit=limit, 163 order=order, 164 fresh=fresh, 165 debug=debug, 166 ) 167 return _sort_df(df) 168 169 if as_dask: 170 from multiprocessing.pool import ThreadPool 171 dask_pool = ThreadPool(self.get_num_workers()) 172 dask.config.set(pool=dask_pool) 173 chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug) 174 bounds = self.get_chunk_bounds( 175 begin=begin, 176 end=end, 177 bounded=False, 178 chunk_interval=chunk_interval, 179 debug=debug, 180 ) 181 dask_chunks = [ 182 dask.delayed(self.get_data)( 183 select_columns=select_columns, 184 omit_columns=omit_columns, 185 begin=chunk_begin, 186 end=chunk_end, 187 params=params, 188 chunk_interval=chunk_interval, 189 order=order, 190 limit=limit, 191 fresh=fresh, 192 debug=debug, 193 ) 194 for (chunk_begin, chunk_end) in bounds 195 ] 196 dask_meta = { 197 col: to_pandas_dtype(typ) 198 for col, typ in self.dtypes.items() 199 } 200 return _sort_df(dd.from_delayed(dask_chunks, meta=dask_meta)) 201 202 if not self.exists(debug=debug): 203 return None 204 205 if self.cache_pipe is not None: 206 if not fresh: 207 _sync_cache_tuple = self.cache_pipe.sync( 208 begin=begin, 209 end=end, 210 params=params, 211 debug=debug, 212 **kw 213 ) 214 if not _sync_cache_tuple[0]: 215 warn(f"Failed to sync cache for {self}:\n" + _sync_cache_tuple[1]) 216 fresh = True 217 else: ### Successfully synced cache. 218 return self.enforce_dtypes( 219 self.cache_pipe.get_data( 220 select_columns=select_columns, 221 omit_columns=omit_columns, 222 begin=begin, 223 end=end, 224 params=params, 225 order=order, 226 limit=limit, 227 debug=debug, 228 fresh=True, 229 **kw 230 ), 231 debug=debug, 232 ) 233 234 with Venv(get_connector_plugin(self.instance_connector)): 235 df = self.instance_connector.get_pipe_data( 236 pipe=self, 237 select_columns=select_columns, 238 omit_columns=omit_columns, 239 begin=begin, 240 end=end, 241 params=params, 242 limit=limit, 243 order=order, 244 debug=debug, 245 **kw 246 ) 247 if df is None: 248 return df 249 250 if not select_columns: 251 select_columns = [col for col in df.columns] 252 253 cols_to_omit = [ 254 col 255 for col in df.columns 256 if ( 257 col in (omit_columns or []) 258 or 259 col not in (select_columns or []) 260 ) 261 ] 262 cols_to_add = [ 263 col 264 for col in select_columns 265 if col not in df.columns 266 ] 267 if cols_to_omit: 268 warn( 269 ( 270 f"Received {len(cols_to_omit)} omitted column" 271 + ('s' if len(cols_to_omit) != 1 else '') 272 + f" for {self}. " 273 + "Consider adding `select_columns` and `omit_columns` support to " 274 + f"'{self.instance_connector.type}' connectors to improve performance." 275 ), 276 stack=False, 277 ) 278 _cols_to_select = [col for col in df.columns if col not in cols_to_omit] 279 df = df[_cols_to_select] 280 281 if cols_to_add: 282 warn( 283 ( 284 f"Specified columns {items_str(cols_to_add)} were not found on {self}. " 285 + "Adding these to the DataFrame as null columns." 286 ), 287 stack=False, 288 ) 289 df = add_missing_cols_to_df(df, {col: 'string' for col in cols_to_add}) 290 291 enforced_df = self.enforce_dtypes(df, debug=debug) 292 293 if order: 294 return _sort_df(enforced_df) 295 return enforced_df
Get a pipe's data from the instance connector.
Parameters
- select_columns (Optional[List[str]], default None):
If provided, only select these given columns.
Otherwise select all available columns (i.e.
SELECT *
). - omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
- begin (Union[datetime, int, str, None], default None):
Lower bound datetime to begin searching for data (inclusive).
Translates to a
WHERE
clause likeWHERE datetime >= begin
. Defaults toNone
. - end (Union[datetime, int, str, None], default None):
Upper bound datetime to stop searching for data (inclusive).
Translates to a
WHERE
clause likeWHERE datetime < end
. Defaults toNone
. - params (Optional[Dict[str, Any]], default None):
Filter the retrieved data by a dictionary of parameters.
See
meerschaum.utils.sql.build_where
for more details. - as_iterator (bool, default False):
If
True
, return a generator of chunks of pipe data. - as_chunks (bool, default False):
Alias for
as_iterator
. - as_dask (bool, default False):
If
True
, return adask.DataFrame
(which may be loaded into a Pandas DataFrame withdf.compute()
). - chunk_interval (Union[timedelta, int, None], default None):
If
as_iterator
, then return chunks withbegin
andend
separated by this interval. This may be set underpipe.parameters['chunk_minutes']
. By default, use a timedelta of 1440 minutes (1 day). Ifchunk_interval
is an integer and thedatetime
axis a timestamp, the use a timedelta with the number of minutes configured to this value. If thedatetime
axis is an integer, default to the configured chunksize. Ifchunk_interval
is atimedelta
and thedatetime
axis an integer, use the number of minutes in thetimedelta
. - order (Optional[str], default 'asc'):
If
order
is notNone
, sort the resulting dataframe by indices. - limit (Optional[int], default None): If provided, cap the dataframe to this many rows.
- fresh (bool, default False):
If
True
, skip local cache and directly query the instance connector. - debug (bool, default False):
Verbosity toggle.
Defaults to
False
.
Returns
- A
pd.DataFrame
for the pipe's data corresponding to the provided parameters.
387def get_backtrack_data( 388 self, 389 backtrack_minutes: Optional[int] = None, 390 begin: Union[datetime, int, None] = None, 391 params: Optional[Dict[str, Any]] = None, 392 limit: Optional[int] = None, 393 fresh: bool = False, 394 debug: bool = False, 395 **kw: Any 396) -> Optional['pd.DataFrame']: 397 """ 398 Get the most recent data from the instance connector as a Pandas DataFrame. 399 400 Parameters 401 ---------- 402 backtrack_minutes: Optional[int], default None 403 How many minutes from `begin` to select from. 404 If `None`, use `pipe.parameters['fetch']['backtrack_minutes']`. 405 406 begin: Optional[datetime], default None 407 The starting point to search for data. 408 If begin is `None` (default), use the most recent observed datetime 409 (AKA sync_time). 410 411 ``` 412 E.g. begin = 02:00 413 414 Search this region. Ignore this, even if there's data. 415 / / / / / / / / / | 416 -----|----------|----------|----------|----------|----------| 417 00:00 01:00 02:00 03:00 04:00 05:00 418 419 ``` 420 421 params: Optional[Dict[str, Any]], default None 422 The standard Meerschaum `params` query dictionary. 423 424 limit: Optional[int], default None 425 If provided, cap the number of rows to be returned. 426 427 fresh: bool, default False 428 If `True`, Ignore local cache and pull directly from the instance connector. 429 Only comes into effect if a pipe was created with `cache=True`. 430 431 debug: bool default False 432 Verbosity toggle. 433 434 Returns 435 ------- 436 A `pd.DataFrame` for the pipe's data corresponding to the provided parameters. Backtrack data 437 is a convenient way to get a pipe's data "backtracked" from the most recent datetime. 438 """ 439 from meerschaum.utils.warnings import warn 440 from meerschaum.utils.venv import Venv 441 from meerschaum.connectors import get_connector_plugin 442 443 if not self.exists(debug=debug): 444 return None 445 446 begin = self.parse_date_bounds(begin) 447 448 backtrack_interval = self.get_backtrack_interval(debug=debug) 449 if backtrack_minutes is None: 450 backtrack_minutes = ( 451 (backtrack_interval.total_seconds() / 60) 452 if isinstance(backtrack_interval, timedelta) 453 else backtrack_interval 454 ) 455 456 if self.cache_pipe is not None: 457 if not fresh: 458 _sync_cache_tuple = self.cache_pipe.sync(begin=begin, params=params, debug=debug, **kw) 459 if not _sync_cache_tuple[0]: 460 warn(f"Failed to sync cache for {self}:\n" + _sync_cache_tuple[1]) 461 fresh = True 462 else: ### Successfully synced cache. 463 return self.enforce_dtypes( 464 self.cache_pipe.get_backtrack_data( 465 fresh=True, 466 begin=begin, 467 backtrack_minutes=backtrack_minutes, 468 params=params, 469 limit=limit, 470 order=kw.get('order', 'desc'), 471 debug=debug, 472 **kw 473 ), 474 debug=debug, 475 ) 476 477 if hasattr(self.instance_connector, 'get_backtrack_data'): 478 with Venv(get_connector_plugin(self.instance_connector)): 479 return self.enforce_dtypes( 480 self.instance_connector.get_backtrack_data( 481 pipe=self, 482 begin=begin, 483 backtrack_minutes=backtrack_minutes, 484 params=params, 485 limit=limit, 486 debug=debug, 487 **kw 488 ), 489 debug=debug, 490 ) 491 492 if begin is None: 493 begin = self.get_sync_time(params=params, debug=debug) 494 495 backtrack_interval = ( 496 timedelta(minutes=backtrack_minutes) 497 if isinstance(begin, datetime) 498 else backtrack_minutes 499 ) 500 if begin is not None: 501 begin = begin - backtrack_interval 502 503 return self.get_data( 504 begin=begin, 505 params=params, 506 debug=debug, 507 limit=limit, 508 order=kw.get('order', 'desc'), 509 **kw 510 )
Get the most recent data from the instance connector as a Pandas DataFrame.
Parameters
- backtrack_minutes (Optional[int], default None):
How many minutes from
begin
to select from. IfNone
, usepipe.parameters['fetch']['backtrack_minutes']
. begin (Optional[datetime], default None): The starting point to search for data. If begin is
None
(default), use the most recent observed datetime (AKA sync_time).E.g. begin = 02:00 Search this region. Ignore this, even if there's data. / / / / / / / / / | -----|----------|----------|----------|----------|----------| 00:00 01:00 02:00 03:00 04:00 05:00
params (Optional[Dict[str, Any]], default None): The standard Meerschaum
params
query dictionary.- limit (Optional[int], default None): If provided, cap the number of rows to be returned.
- fresh (bool, default False):
If
True
, Ignore local cache and pull directly from the instance connector. Only comes into effect if a pipe was created withcache=True
. - debug (bool default False): Verbosity toggle.
Returns
- A
pd.DataFrame
for the pipe's data corresponding to the provided parameters. Backtrack data - is a convenient way to get a pipe's data "backtracked" from the most recent datetime.
513def get_rowcount( 514 self, 515 begin: Union[datetime, int, None] = None, 516 end: Union[datetime, int, None] = None, 517 params: Optional[Dict[str, Any]] = None, 518 remote: bool = False, 519 debug: bool = False 520) -> int: 521 """ 522 Get a Pipe's instance or remote rowcount. 523 524 Parameters 525 ---------- 526 begin: Optional[datetime], default None 527 Count rows where datetime > begin. 528 529 end: Optional[datetime], default None 530 Count rows where datetime < end. 531 532 remote: bool, default False 533 Count rows from a pipe's remote source. 534 **NOTE**: This is experimental! 535 536 debug: bool, default False 537 Verbosity toggle. 538 539 Returns 540 ------- 541 An `int` of the number of rows in the pipe corresponding to the provided parameters. 542 Returned 0 if the pipe does not exist. 543 """ 544 from meerschaum.utils.warnings import warn 545 from meerschaum.utils.venv import Venv 546 from meerschaum.connectors import get_connector_plugin 547 from meerschaum.utils.misc import filter_keywords 548 549 begin, end = self.parse_date_bounds(begin, end) 550 connector = self.instance_connector if not remote else self.connector 551 try: 552 with Venv(get_connector_plugin(connector)): 553 if not hasattr(connector, 'get_pipe_rowcount'): 554 warn( 555 f"Connectors of type '{connector.type}' " 556 "do not implement `get_pipe_rowcount()`.", 557 stack=False, 558 ) 559 return 0 560 kwargs = filter_keywords( 561 connector.get_pipe_rowcount, 562 begin=begin, 563 end=end, 564 params=params, 565 remote=remote, 566 debug=debug, 567 ) 568 if remote and 'remote' not in kwargs: 569 warn( 570 f"Connectors of type '{connector.type}' do not support remote rowcounts.", 571 stack=False, 572 ) 573 return 0 574 rowcount = connector.get_pipe_rowcount( 575 self, 576 begin=begin, 577 end=end, 578 params=params, 579 remote=remote, 580 debug=debug, 581 ) 582 if rowcount is None: 583 return 0 584 return rowcount 585 except AttributeError as e: 586 warn(e) 587 if remote: 588 return 0 589 warn(f"Failed to get a rowcount for {self}.") 590 return 0
Get a Pipe's instance or remote rowcount.
Parameters
- begin (Optional[datetime], default None): Count rows where datetime > begin.
- end (Optional[datetime], default None): Count rows where datetime < end.
- remote (bool, default False): Count rows from a pipe's remote source. NOTE: This is experimental!
- debug (bool, default False): Verbosity toggle.
Returns
- An
int
of the number of rows in the pipe corresponding to the provided parameters. - Returned 0 if the pipe does not exist.
593def get_chunk_interval( 594 self, 595 chunk_interval: Union[timedelta, int, None] = None, 596 debug: bool = False, 597) -> Union[timedelta, int]: 598 """ 599 Get the chunk interval to use for this pipe. 600 601 Parameters 602 ---------- 603 chunk_interval: Union[timedelta, int, None], default None 604 If provided, coerce this value into the correct type. 605 For example, if the datetime axis is an integer, then 606 return the number of minutes. 607 608 Returns 609 ------- 610 The chunk interval (`timedelta` or `int`) to use with this pipe's `datetime` axis. 611 """ 612 default_chunk_minutes = get_config('pipes', 'parameters', 'verify', 'chunk_minutes') 613 configured_chunk_minutes = self.parameters.get('verify', {}).get('chunk_minutes', None) 614 chunk_minutes = ( 615 (configured_chunk_minutes or default_chunk_minutes) 616 if chunk_interval is None 617 else ( 618 chunk_interval 619 if isinstance(chunk_interval, int) 620 else int(chunk_interval.total_seconds() / 60) 621 ) 622 ) 623 624 dt_col = self.columns.get('datetime', None) 625 if dt_col is None: 626 return timedelta(minutes=chunk_minutes) 627 628 dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') 629 if 'int' in dt_dtype.lower(): 630 return chunk_minutes 631 return timedelta(minutes=chunk_minutes)
Get the chunk interval to use for this pipe.
Parameters
- chunk_interval (Union[timedelta, int, None], default None): If provided, coerce this value into the correct type. For example, if the datetime axis is an integer, then return the number of minutes.
Returns
- The chunk interval (
timedelta
orint
) to use with this pipe'sdatetime
axis.
634def get_chunk_bounds( 635 self, 636 begin: Union[datetime, int, None] = None, 637 end: Union[datetime, int, None] = None, 638 bounded: bool = False, 639 chunk_interval: Union[timedelta, int, None] = None, 640 debug: bool = False, 641) -> List[ 642 Tuple[ 643 Union[datetime, int, None], 644 Union[datetime, int, None], 645 ] 646]: 647 """ 648 Return a list of datetime bounds for iterating over the pipe's `datetime` axis. 649 650 Parameters 651 ---------- 652 begin: Union[datetime, int, None], default None 653 If provided, do not select less than this value. 654 Otherwise the first chunk will be unbounded. 655 656 end: Union[datetime, int, None], default None 657 If provided, do not select greater than or equal to this value. 658 Otherwise the last chunk will be unbounded. 659 660 bounded: bool, default False 661 If `True`, do not include `None` in the first chunk. 662 663 chunk_interval: Union[timedelta, int, None], default None 664 If provided, use this interval for the size of chunk boundaries. 665 The default value for this pipe may be set 666 under `pipe.parameters['verify']['chunk_minutes']`. 667 668 debug: bool, default False 669 Verbosity toggle. 670 671 Returns 672 ------- 673 A list of chunk bounds (datetimes or integers). 674 If unbounded, the first and last chunks will include `None`. 675 """ 676 from datetime import timedelta 677 from meerschaum.utils.dtypes import are_dtypes_equal 678 from meerschaum.utils.misc import interval_str 679 include_less_than_begin = not bounded and begin is None 680 include_greater_than_end = not bounded and end is None 681 if begin is None: 682 begin = self.get_sync_time(newest=False, debug=debug) 683 consolidate_end_chunk = False 684 if end is None: 685 end = self.get_sync_time(newest=True, debug=debug) 686 if end is not None and hasattr(end, 'tzinfo'): 687 end += timedelta(minutes=1) 688 consolidate_end_chunk = True 689 elif are_dtypes_equal(str(type(end)), 'int'): 690 end += 1 691 consolidate_end_chunk = True 692 if begin is None and end is None: 693 return [(None, None)] 694 695 begin, end = self.parse_date_bounds(begin, end) 696 697 ### Set the chunk interval under `pipe.parameters['verify']['chunk_minutes']`. 698 chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug) 699 700 ### Build a list of tuples containing the chunk boundaries 701 ### so that we can sync multiple chunks in parallel. 702 ### Run `verify pipes --workers 1` to sync chunks in series. 703 chunk_bounds = [] 704 begin_cursor = begin 705 num_chunks = 0 706 max_chunks = 1_000_000 707 while begin_cursor < end: 708 end_cursor = begin_cursor + chunk_interval 709 chunk_bounds.append((begin_cursor, end_cursor)) 710 begin_cursor = end_cursor 711 num_chunks += 1 712 if num_chunks >= max_chunks: 713 raise ValueError( 714 f"Too many chunks of size '{interval_str(chunk_interval)}' " 715 f"between '{begin}' and '{end}'." 716 ) 717 718 if num_chunks > 1 and consolidate_end_chunk: 719 last_bounds, second_last_bounds = chunk_bounds[-1], chunk_bounds[-2] 720 chunk_bounds = chunk_bounds[:-2] 721 chunk_bounds.append((second_last_bounds[0], last_bounds[1])) 722 723 ### The chunk interval might be too large. 724 if not chunk_bounds and end >= begin: 725 chunk_bounds = [(begin, end)] 726 727 ### Truncate the last chunk to the end timestamp. 728 if chunk_bounds[-1][1] > end: 729 chunk_bounds[-1] = (chunk_bounds[-1][0], end) 730 731 ### Pop the last chunk if its bounds are equal. 732 if chunk_bounds[-1][0] == chunk_bounds[-1][1]: 733 chunk_bounds = chunk_bounds[:-1] 734 735 if include_less_than_begin: 736 chunk_bounds = [(None, begin)] + chunk_bounds 737 if include_greater_than_end: 738 chunk_bounds = chunk_bounds + [(end, None)] 739 740 return chunk_bounds
Return a list of datetime bounds for iterating over the pipe's datetime
axis.
Parameters
- begin (Union[datetime, int, None], default None): If provided, do not select less than this value. Otherwise the first chunk will be unbounded.
- end (Union[datetime, int, None], default None): If provided, do not select greater than or equal to this value. Otherwise the last chunk will be unbounded.
- bounded (bool, default False):
If
True
, do not includeNone
in the first chunk. - chunk_interval (Union[timedelta, int, None], default None):
If provided, use this interval for the size of chunk boundaries.
The default value for this pipe may be set
under
pipe.parameters['verify']['chunk_minutes']
. - debug (bool, default False): Verbosity toggle.
Returns
- A list of chunk bounds (datetimes or integers).
- If unbounded, the first and last chunks will include
None
.
743def get_chunk_bounds_batches( 744 self, 745 chunk_bounds: List[Tuple[Union[datetime, int, None], Union[datetime, int, None]]], 746 batchsize: Optional[int] = None, 747 workers: Optional[int] = None, 748 debug: bool = False, 749) -> List[ 750 Tuple[ 751 Tuple[ 752 Union[datetime, int, None], 753 Union[datetime, int, None], 754 ], ... 755 ] 756]: 757 """ 758 Return a list of tuples of chunk bounds of size `batchsize`. 759 760 Parameters 761 ---------- 762 chunk_bounds: List[Tuple[Union[datetime, int, None], Union[datetime, int, None]]] 763 A list of chunk_bounds (see `Pipe.get_chunk_bounds()`). 764 765 batchsize: Optional[int], default None 766 How many chunks to include in a batch. Defaults to `Pipe.get_num_workers()`. 767 768 workers: Optional[int], default None 769 If `batchsize` is `None`, use this as the desired number of workers. 770 Passed to `Pipe.get_num_workers()`. 771 772 Returns 773 ------- 774 A list of tuples of chunk bound tuples. 775 """ 776 from meerschaum.utils.misc import iterate_chunks 777 778 if batchsize is None: 779 batchsize = self.get_num_workers(workers=workers) 780 781 return [ 782 tuple( 783 _batch_chunk_bounds 784 for _batch_chunk_bounds in batch 785 if _batch_chunk_bounds is not None 786 ) 787 for batch in iterate_chunks(chunk_bounds, batchsize) 788 if batch 789 ]
Return a list of tuples of chunk bounds of size batchsize
.
Parameters
- chunk_bounds (List[Tuple[Union[datetime, int, None], Union[datetime, int, None]]]):
A list of chunk_bounds (see
Pipe.get_chunk_bounds()
). - batchsize (Optional[int], default None):
How many chunks to include in a batch. Defaults to
Pipe.get_num_workers()
. - workers (Optional[int], default None):
If
batchsize
isNone
, use this as the desired number of workers. Passed toPipe.get_num_workers()
.
Returns
- A list of tuples of chunk bound tuples.
792def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[ 793 datetime, 794 int, 795 str, 796 None, 797 Tuple[Union[datetime, int, str, None]] 798]: 799 """ 800 Given a date bound (begin, end), coerce a timezone if necessary. 801 """ 802 from meerschaum.utils.misc import is_int 803 from meerschaum.utils.dtypes import coerce_timezone 804 from meerschaum.utils.warnings import warn 805 dateutil_parser = mrsm.attempt_import('dateutil.parser') 806 807 def _parse_date_bound(dt_val): 808 if dt_val is None: 809 return None 810 811 if isinstance(dt_val, int): 812 return dt_val 813 814 if dt_val == '': 815 return '' 816 817 if is_int(dt_val): 818 return int(dt_val) 819 820 if isinstance(dt_val, str): 821 try: 822 dt_val = dateutil_parser.parse(dt_val) 823 except Exception as e: 824 warn(f"Could not parse '{dt_val}' as datetime:\n{e}") 825 return None 826 827 dt_col = self.columns.get('datetime', None) 828 dt_typ = str(self.dtypes.get(dt_col, 'datetime64[ns, UTC]')) 829 if dt_typ == 'datetime': 830 dt_typ = 'datetime64[ns, UTC]' 831 return coerce_timezone(dt_val, strip_utc=('utc' not in dt_typ.lower())) 832 833 bounds = tuple(_parse_date_bound(dt_val) for dt_val in dt_vals) 834 if len(bounds) == 1: 835 return bounds[0] 836 return bounds
Given a date bound (begin, end), coerce a timezone if necessary.
12def register( 13 self, 14 debug: bool = False, 15 **kw: Any 16 ) -> SuccessTuple: 17 """ 18 Register a new Pipe along with its attributes. 19 20 Parameters 21 ---------- 22 debug: bool, default False 23 Verbosity toggle. 24 25 kw: Any 26 Keyword arguments to pass to `instance_connector.register_pipe()`. 27 28 Returns 29 ------- 30 A `SuccessTuple` of success, message. 31 """ 32 if self.temporary: 33 return False, "Cannot register pipes created with `temporary=True` (read-only)." 34 35 from meerschaum.utils.formatting import get_console 36 from meerschaum.utils.venv import Venv 37 from meerschaum.connectors import get_connector_plugin, custom_types 38 from meerschaum.config._patch import apply_patch_to_config 39 40 import warnings 41 with warnings.catch_warnings(): 42 warnings.simplefilter('ignore') 43 try: 44 _conn = self.connector 45 except Exception as e: 46 _conn = None 47 48 if ( 49 _conn is not None 50 and 51 (_conn.type == 'plugin' or _conn.type in custom_types) 52 and 53 getattr(_conn, 'register', None) is not None 54 ): 55 try: 56 with Venv(get_connector_plugin(_conn), debug=debug): 57 params = self.connector.register(self) 58 except Exception as e: 59 get_console().print_exception() 60 params = None 61 params = {} if params is None else params 62 if not isinstance(params, dict): 63 from meerschaum.utils.warnings import warn 64 warn( 65 f"Invalid parameters returned from `register()` in connector {self.connector}:\n" 66 + f"{params}" 67 ) 68 else: 69 self.parameters = apply_patch_to_config(params, self.parameters) 70 71 if not self.parameters: 72 cols = self.columns if self.columns else {'datetime': None, 'id': None} 73 self.parameters = { 74 'columns': cols, 75 } 76 77 with Venv(get_connector_plugin(self.instance_connector)): 78 return self.instance_connector.register_pipe(self, debug=debug, **kw)
Register a new Pipe along with its attributes.
Parameters
- debug (bool, default False): Verbosity toggle.
- kw (Any):
Keyword arguments to pass to
instance_connector.register_pipe()
.
Returns
- A
SuccessTuple
of success, message.
19@property 20def attributes(self) -> Dict[str, Any]: 21 """ 22 Return a dictionary of a pipe's keys and parameters. 23 These values are reflected directly from the pipes table of the instance. 24 """ 25 import time 26 from meerschaum.config import get_config 27 from meerschaum.config._patch import apply_patch_to_config 28 from meerschaum.utils.venv import Venv 29 from meerschaum.connectors import get_connector_plugin 30 31 timeout_seconds = get_config('pipes', 'attributes', 'local_cache_timeout_seconds') 32 33 if '_attributes' not in self.__dict__: 34 self._attributes = {} 35 36 now = time.perf_counter() 37 last_refresh = self.__dict__.get('_attributes_sync_time', None) 38 timed_out = ( 39 last_refresh is None 40 or 41 (timeout_seconds is not None and (now - last_refresh) >= timeout_seconds) 42 ) 43 if not self.temporary and timed_out: 44 self._attributes_sync_time = now 45 local_attributes = self.__dict__.get('_attributes', {}) 46 with Venv(get_connector_plugin(self.instance_connector)): 47 instance_attributes = self.instance_connector.get_pipe_attributes(self) 48 self._attributes = apply_patch_to_config(instance_attributes, local_attributes) 49 return self._attributes
Return a dictionary of a pipe's keys and parameters. These values are reflected directly from the pipes table of the instance.
52@property 53def parameters(self) -> Optional[Dict[str, Any]]: 54 """ 55 Return the parameters dictionary of the pipe. 56 """ 57 if 'parameters' not in self.attributes: 58 self.attributes['parameters'] = {} 59 _parameters = self.attributes['parameters'] 60 dt_col = _parameters.get('columns', {}).get('datetime', None) 61 dt_typ = _parameters.get('dtypes', {}).get(dt_col, None) if dt_col else None 62 if dt_col and not dt_typ: 63 if 'dtypes' not in _parameters: 64 self.attributes['parameters']['dtypes'] = {} 65 self.attributes['parameters']['dtypes'][dt_col] = 'datetime' 66 return self.attributes['parameters']
Return the parameters dictionary of the pipe.
78@property 79def columns(self) -> Union[Dict[str, str], None]: 80 """ 81 Return the `columns` dictionary defined in `meerschaum.Pipe.parameters`. 82 """ 83 if 'columns' not in self.parameters: 84 self.parameters['columns'] = {} 85 cols = self.parameters['columns'] 86 if not isinstance(cols, dict): 87 cols = {} 88 self.parameters['columns'] = cols 89 return {col_ix: col for col_ix, col in cols.items() if col}
Return the columns
dictionary defined in meerschaum.Pipe.parameters
.
106@property 107def indices(self) -> Union[Dict[str, Union[str, List[str]]], None]: 108 """ 109 Return the `indices` dictionary defined in `meerschaum.Pipe.parameters`. 110 """ 111 indices_key = ( 112 'indexes' 113 if 'indexes' in self.parameters 114 else 'indices' 115 ) 116 if indices_key not in self.parameters: 117 self.parameters[indices_key] = {} 118 _indices = self.parameters[indices_key] 119 _columns = self.columns 120 dt_col = _columns.get('datetime', None) 121 if not isinstance(_indices, dict): 122 _indices = {} 123 self.parameters[indices_key] = _indices 124 unique_cols = list(set(( 125 [dt_col] 126 if dt_col 127 else [] 128 ) + [ 129 col 130 for col_ix, col in _columns.items() 131 if col and col_ix != 'datetime' 132 ])) 133 return { 134 **({'unique': unique_cols} if len(unique_cols) > 1 else {}), 135 **{col_ix: col for col_ix, col in _columns.items() if col}, 136 **_indices 137 }
Return the indices
dictionary defined in meerschaum.Pipe.parameters
.
140@property 141def indexes(self) -> Union[Dict[str, Union[str, List[str]]], None]: 142 """ 143 Alias for `meerschaum.Pipe.indices`. 144 """ 145 return self.indices
Alias for meerschaum.Pipe.indices
.
198@property 199def dtypes(self) -> Union[Dict[str, Any], None]: 200 """ 201 If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`. 202 """ 203 from meerschaum.config._patch import apply_patch_to_config 204 from meerschaum.utils.dtypes import MRSM_ALIAS_DTYPES 205 configured_dtypes = self.parameters.get('dtypes', {}) 206 remote_dtypes = self.infer_dtypes(persist=False) 207 patched_dtypes = apply_patch_to_config(remote_dtypes, configured_dtypes) 208 return { 209 col: MRSM_ALIAS_DTYPES.get(typ, typ) 210 for col, typ in patched_dtypes.items() 211 if col and typ 212 }
If defined, return the dtypes
dictionary defined in meerschaum.Pipe.parameters
.
260@property 261def autoincrement(self) -> bool: 262 """ 263 Return the `autoincrement` parameter for the pipe. 264 """ 265 if 'autoincrement' not in self.parameters: 266 self.parameters['autoincrement'] = False 267 268 return self.parameters['autoincrement']
Return the autoincrement
parameter for the pipe.
224@property 225def upsert(self) -> bool: 226 """ 227 Return whether `upsert` is set for the pipe. 228 """ 229 if 'upsert' not in self.parameters: 230 self.parameters['upsert'] = False 231 return self.parameters['upsert']
Return whether upsert
is set for the pipe.
242@property 243def static(self) -> bool: 244 """ 245 Return whether `static` is set for the pipe. 246 """ 247 if 'static' not in self.parameters: 248 self.parameters['static'] = False 249 return self.parameters['static']
Return whether static
is set for the pipe.
279@property 280def tzinfo(self) -> Union[None, timezone]: 281 """ 282 Return `timezone.utc` if the pipe is timezone-aware. 283 """ 284 dt_col = self.columns.get('datetime', None) 285 if not dt_col: 286 return None 287 288 dt_typ = str(self.dtypes.get(dt_col, 'datetime64[ns, UTC]')) 289 if 'utc' in dt_typ.lower() or dt_typ == 'datetime': 290 return timezone.utc 291 292 if dt_typ == 'datetime64[ns]': 293 return None 294 295 return None
Return timezone.utc
if the pipe is timezone-aware.
298@property 299def enforce(self) -> bool: 300 """ 301 Return the `enforce` parameter for the pipe. 302 """ 303 if 'enforce' not in self.parameters: 304 self.parameters['enforce'] = True 305 306 return self.parameters['enforce']
Return the enforce
parameter for the pipe.
317@property 318def null_indices(self) -> bool: 319 """ 320 Return the `null_indices` parameter for the pipe. 321 """ 322 if 'null_indices' not in self.parameters: 323 self.parameters['null_indices'] = True 324 325 return self.parameters['null_indices']
Return the null_indices
parameter for the pipe.
336def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]: 337 """ 338 Check if the requested columns are defined. 339 340 Parameters 341 ---------- 342 *args: str 343 The column names to be retrieved. 344 345 error: bool, default False 346 If `True`, raise an `Exception` if the specified column is not defined. 347 348 Returns 349 ------- 350 A tuple of the same size of `args` or a `str` if `args` is a single argument. 351 352 Examples 353 -------- 354 >>> pipe = mrsm.Pipe('test', 'test') 355 >>> pipe.columns = {'datetime': 'dt', 'id': 'id'} 356 >>> pipe.get_columns('datetime', 'id') 357 ('dt', 'id') 358 >>> pipe.get_columns('value', error=True) 359 Exception: 🛑 Missing 'value' column for Pipe('test', 'test'). 360 """ 361 from meerschaum.utils.warnings import error as _error, warn 362 if not args: 363 args = tuple(self.columns.keys()) 364 col_names = [] 365 for col in args: 366 col_name = None 367 try: 368 col_name = self.columns[col] 369 if col_name is None and error: 370 _error(f"Please define the name of the '{col}' column for {self}.") 371 except Exception as e: 372 col_name = None 373 if col_name is None and error: 374 _error(f"Missing '{col}'" + f" column for {self}.") 375 col_names.append(col_name) 376 if len(col_names) == 1: 377 return col_names[0] 378 return tuple(col_names)
Check if the requested columns are defined.
Parameters
- *args (str): The column names to be retrieved.
- error (bool, default False):
If
True
, raise anException
if the specified column is not defined.
Returns
- A tuple of the same size of
args
or astr
ifargs
is a single argument.
Examples
>>> pipe = mrsm.Pipe('test', 'test')
>>> pipe.columns = {'datetime': 'dt', 'id': 'id'}
>>> pipe.get_columns('datetime', 'id')
('dt', 'id')
>>> pipe.get_columns('value', error=True)
Exception: 🛑 Missing 'value' column for Pipe('test', 'test').
381def get_columns_types( 382 self, 383 refresh: bool = False, 384 debug: bool = False, 385) -> Union[Dict[str, str], None]: 386 """ 387 Get a dictionary of a pipe's column names and their types. 388 389 Parameters 390 ---------- 391 refresh: bool, default False 392 If `True`, invalidate the cache and fetch directly from the instance connector. 393 394 debug: bool, default False: 395 Verbosity toggle. 396 397 Returns 398 ------- 399 A dictionary of column names (`str`) to column types (`str`). 400 401 Examples 402 -------- 403 >>> pipe.get_columns_types() 404 { 405 'dt': 'TIMESTAMP WITH TIMEZONE', 406 'id': 'BIGINT', 407 'val': 'DOUBLE PRECISION', 408 } 409 >>> 410 """ 411 import time 412 from meerschaum.connectors import get_connector_plugin 413 from meerschaum.config.static import STATIC_CONFIG 414 from meerschaum.utils.warnings import dprint 415 416 now = time.perf_counter() 417 cache_seconds = STATIC_CONFIG['pipes']['static_schema_cache_seconds'] 418 if not self.static: 419 refresh = True 420 if refresh: 421 _ = self.__dict__.pop('_columns_types_timestamp', None) 422 _ = self.__dict__.pop('_columns_types', None) 423 _columns_types = self.__dict__.get('_columns_types', None) 424 if _columns_types: 425 columns_types_timestamp = self.__dict__.get('_columns_types_timestamp', None) 426 if columns_types_timestamp is not None: 427 delta = now - columns_types_timestamp 428 if delta < cache_seconds: 429 if debug: 430 dprint( 431 f"Returning cached `columns_types` for {self} " 432 f"({round(delta, 2)} seconds old)." 433 ) 434 return _columns_types 435 436 with mrsm.Venv(get_connector_plugin(self.instance_connector)): 437 _columns_types = ( 438 self.instance_connector.get_pipe_columns_types(self, debug=debug) 439 if hasattr(self.instance_connector, 'get_pipe_columns_types') 440 else None 441 ) 442 443 self.__dict__['_columns_types'] = _columns_types 444 self.__dict__['_columns_types_timestamp'] = now 445 return _columns_types or {}
Get a dictionary of a pipe's column names and their types.
Parameters
- refresh (bool, default False):
If
True
, invalidate the cache and fetch directly from the instance connector. - debug (bool, default False:): Verbosity toggle.
Returns
- A dictionary of column names (
str
) to column types (str
).
Examples
>>> pipe.get_columns_types()
{
'dt': 'TIMESTAMP WITH TIMEZONE',
'id': 'BIGINT',
'val': 'DOUBLE PRECISION',
}
>>>
448def get_columns_indices( 449 self, 450 debug: bool = False, 451 refresh: bool = False, 452) -> Dict[str, List[Dict[str, str]]]: 453 """ 454 Return a dictionary mapping columns to index information. 455 """ 456 import time 457 from meerschaum.connectors import get_connector_plugin 458 from meerschaum.config.static import STATIC_CONFIG 459 from meerschaum.utils.warnings import dprint 460 461 now = time.perf_counter() 462 cache_seconds = ( 463 STATIC_CONFIG['pipes']['static_schema_cache_seconds'] 464 if self.static 465 else STATIC_CONFIG['pipes']['exists_timeout_seconds'] 466 ) 467 if refresh: 468 _ = self.__dict__.pop('_columns_indices_timestamp', None) 469 _ = self.__dict__.pop('_columns_indices', None) 470 _columns_indices = self.__dict__.get('_columns_indices', None) 471 if _columns_indices: 472 columns_indices_timestamp = self.__dict__.get('_columns_indices_timestamp', None) 473 if columns_indices_timestamp is not None: 474 delta = now - columns_indices_timestamp 475 if delta < cache_seconds: 476 if debug: 477 dprint( 478 f"Returning cached `columns_indices` for {self} " 479 f"({round(delta, 2)} seconds old)." 480 ) 481 return _columns_indices 482 483 with mrsm.Venv(get_connector_plugin(self.instance_connector)): 484 _columns_indices = ( 485 self.instance_connector.get_pipe_columns_indices(self, debug=debug) 486 if hasattr(self.instance_connector, 'get_pipe_columns_indices') 487 else None 488 ) 489 490 self.__dict__['_columns_indices'] = _columns_indices 491 self.__dict__['_columns_indices_timestamp'] = now 492 return {k: v for k, v in _columns_indices.items() if k and v} or {}
Return a dictionary mapping columns to index information.
743def get_indices(self) -> Dict[str, str]: 744 """ 745 Return a dictionary mapping index keys to their names in the database. 746 747 Returns 748 ------- 749 A dictionary of index keys to index names. 750 """ 751 from meerschaum.connectors import get_connector_plugin 752 with mrsm.Venv(get_connector_plugin(self.instance_connector)): 753 if hasattr(self.instance_connector, 'get_pipe_index_names'): 754 result = self.instance_connector.get_pipe_index_names(self) 755 else: 756 result = {} 757 758 return result
Return a dictionary mapping index keys to their names in the database.
Returns
- A dictionary of index keys to index names.
495def get_id(self, **kw: Any) -> Union[int, None]: 496 """ 497 Fetch a pipe's ID from its instance connector. 498 If the pipe does not exist, return `None`. 499 """ 500 if self.temporary: 501 return None 502 from meerschaum.utils.venv import Venv 503 from meerschaum.connectors import get_connector_plugin 504 505 with Venv(get_connector_plugin(self.instance_connector)): 506 if hasattr(self.instance_connector, 'get_pipe_id'): 507 return self.instance_connector.get_pipe_id(self, **kw) 508 509 return None
Fetch a pipe's ID from its instance connector.
If the pipe does not exist, return None
.
512@property 513def id(self) -> Union[int, None]: 514 """ 515 Fetch and cache a pipe's ID. 516 """ 517 if not ('_id' in self.__dict__ and self._id): 518 self._id = self.get_id() 519 return self._id
Fetch and cache a pipe's ID.
522def get_val_column(self, debug: bool = False) -> Union[str, None]: 523 """ 524 Return the name of the value column if it's defined, otherwise make an educated guess. 525 If not set in the `columns` dictionary, return the first numeric column that is not 526 an ID or datetime column. 527 If none may be found, return `None`. 528 529 Parameters 530 ---------- 531 debug: bool, default False: 532 Verbosity toggle. 533 534 Returns 535 ------- 536 Either a string or `None`. 537 """ 538 from meerschaum.utils.debug import dprint 539 if debug: 540 dprint('Attempting to determine the value column...') 541 try: 542 val_name = self.get_columns('value') 543 except Exception as e: 544 val_name = None 545 if val_name is not None: 546 if debug: 547 dprint(f"Value column: {val_name}") 548 return val_name 549 550 cols = self.columns 551 if cols is None: 552 if debug: 553 dprint('No columns could be determined. Returning...') 554 return None 555 try: 556 dt_name = self.get_columns('datetime', error=False) 557 except Exception as e: 558 dt_name = None 559 try: 560 id_name = self.get_columns('id', errors=False) 561 except Exception as e: 562 id_name = None 563 564 if debug: 565 dprint(f"dt_name: {dt_name}") 566 dprint(f"id_name: {id_name}") 567 568 cols_types = self.get_columns_types(debug=debug) 569 if cols_types is None: 570 return None 571 if debug: 572 dprint(f"cols_types: {cols_types}") 573 if dt_name is not None: 574 cols_types.pop(dt_name, None) 575 if id_name is not None: 576 cols_types.pop(id_name, None) 577 578 candidates = [] 579 candidate_keywords = {'float', 'double', 'precision', 'int', 'numeric',} 580 for search_term in candidate_keywords: 581 for col, typ in cols_types.items(): 582 if search_term in typ.lower(): 583 candidates.append(col) 584 break 585 if not candidates: 586 if debug: 587 dprint("No value column could be determined.") 588 return None 589 590 return candidates[0]
Return the name of the value column if it's defined, otherwise make an educated guess.
If not set in the columns
dictionary, return the first numeric column that is not
an ID or datetime column.
If none may be found, return None
.
Parameters
- debug (bool, default False:): Verbosity toggle.
Returns
- Either a string or
None
.
593@property 594def parents(self) -> List[mrsm.Pipe]: 595 """ 596 Return a list of `meerschaum.Pipe` objects to be designated as parents. 597 """ 598 if 'parents' not in self.parameters: 599 return [] 600 from meerschaum.utils.warnings import warn 601 _parents_keys = self.parameters['parents'] 602 if not isinstance(_parents_keys, list): 603 warn( 604 f"Please ensure the parents for {self} are defined as a list of keys.", 605 stacklevel = 4 606 ) 607 return [] 608 from meerschaum import Pipe 609 _parents = [] 610 for keys in _parents_keys: 611 try: 612 p = Pipe(**keys) 613 except Exception as e: 614 warn(f"Unable to build parent with keys '{keys}' for {self}:\n{e}") 615 continue 616 _parents.append(p) 617 return _parents
Return a list of meerschaum.Pipe
objects to be designated as parents.
620@property 621def parent(self) -> Union[mrsm.Pipe, None]: 622 """ 623 Return the first pipe in `self.parents` or `None`. 624 """ 625 parents = self.parents 626 if not parents: 627 return None 628 return parents[0]
Return the first pipe in self.parents
or None
.
631@property 632def children(self) -> List[mrsm.Pipe]: 633 """ 634 Return a list of `meerschaum.Pipe` objects to be designated as children. 635 """ 636 if 'children' not in self.parameters: 637 return [] 638 from meerschaum.utils.warnings import warn 639 _children_keys = self.parameters['children'] 640 if not isinstance(_children_keys, list): 641 warn( 642 f"Please ensure the children for {self} are defined as a list of keys.", 643 stacklevel = 4 644 ) 645 return [] 646 from meerschaum import Pipe 647 _children = [] 648 for keys in _children_keys: 649 try: 650 p = Pipe(**keys) 651 except Exception as e: 652 warn(f"Unable to build parent with keys '{keys}' for {self}:\n{e}") 653 continue 654 _children.append(p) 655 return _children
Return a list of meerschaum.Pipe
objects to be designated as children.
658@property 659def target(self) -> str: 660 """ 661 The target table name. 662 You can set the target name under on of the following keys 663 (checked in this order): 664 - `target` 665 - `target_name` 666 - `target_table` 667 - `target_table_name` 668 """ 669 if 'target' not in self.parameters: 670 default_target = self._target_legacy() 671 default_targets = {default_target} 672 potential_keys = ('target_name', 'target_table', 'target_table_name') 673 _target = None 674 for k in potential_keys: 675 if k in self.parameters: 676 _target = self.parameters[k] 677 break 678 679 _target = _target or default_target 680 681 if self.instance_connector.type == 'sql': 682 from meerschaum.utils.sql import truncate_item_name 683 truncated_target = truncate_item_name(_target, self.instance_connector.flavor) 684 default_targets.add(truncated_target) 685 warned_target = self.__dict__.get('_warned_target', False) 686 if truncated_target != _target and not warned_target: 687 if not warned_target: 688 warn( 689 f"The target '{_target}' is too long for '{self.instance_connector.flavor}', " 690 + f"will use {truncated_target} instead." 691 ) 692 self.__dict__['_warned_target'] = True 693 _target = truncated_target 694 695 if _target in default_targets: 696 return _target 697 self.target = _target 698 return self.parameters['target']
The target table name. You can set the target name under on of the following keys (checked in this order):
target
target_name
target_table
target_table_name
721def guess_datetime(self) -> Union[str, None]: 722 """ 723 Try to determine a pipe's datetime column. 724 """ 725 _dtypes = self.dtypes 726 727 ### Abort if the user explictly disallows a datetime index. 728 if 'datetime' in _dtypes: 729 if _dtypes['datetime'] is None: 730 return None 731 732 from meerschaum.utils.dtypes import are_dtypes_equal 733 dt_cols = [ 734 col 735 for col, typ in _dtypes.items() 736 if are_dtypes_equal(typ, 'datetime') 737 ] 738 if not dt_cols: 739 return None 740 return dt_cols[0]
Try to determine a pipe's datetime column.
12def show( 13 self, 14 nopretty: bool = False, 15 debug: bool = False, 16 **kw 17 ) -> SuccessTuple: 18 """ 19 Show attributes of a Pipe. 20 21 Parameters 22 ---------- 23 nopretty: bool, default False 24 If `True`, simply print the JSON of the pipe's attributes. 25 26 debug: bool, default False 27 Verbosity toggle. 28 29 Returns 30 ------- 31 A `SuccessTuple` of success, message. 32 33 """ 34 import json 35 from meerschaum.utils.formatting import ( 36 pprint, make_header, ANSI, highlight_pipes, fill_ansi, get_console, 37 ) 38 from meerschaum.utils.packages import import_rich, attempt_import 39 from meerschaum.utils.warnings import info 40 attributes_json = json.dumps(self.attributes) 41 if not nopretty: 42 _to_print = f"Attributes for {self}:" 43 if ANSI: 44 _to_print = fill_ansi(highlight_pipes(make_header(_to_print)), 'magenta') 45 print(_to_print) 46 rich = import_rich() 47 rich_json = attempt_import('rich.json') 48 get_console().print(rich_json.JSON(attributes_json)) 49 else: 50 print(_to_print) 51 else: 52 print(attributes_json) 53 54 return True, "Success"
Show attributes of a Pipe.
Parameters
- nopretty (bool, default False):
If
True
, simply print the JSON of the pipe's attributes. - debug (bool, default False): Verbosity toggle.
Returns
- A
SuccessTuple
of success, message.
21def edit( 22 self, 23 patch: bool = False, 24 interactive: bool = False, 25 debug: bool = False, 26 **kw: Any 27) -> SuccessTuple: 28 """ 29 Edit a Pipe's configuration. 30 31 Parameters 32 ---------- 33 patch: bool, default False 34 If `patch` is True, update parameters by cascading rather than overwriting. 35 interactive: bool, default False 36 If `True`, open an editor for the user to make changes to the pipe's YAML file. 37 debug: bool, default False 38 Verbosity toggle. 39 40 Returns 41 ------- 42 A `SuccessTuple` of success, message. 43 44 """ 45 from meerschaum.utils.venv import Venv 46 from meerschaum.connectors import get_connector_plugin 47 48 if self.temporary: 49 return False, "Cannot edit pipes created with `temporary=True` (read-only)." 50 51 if not interactive: 52 with Venv(get_connector_plugin(self.instance_connector)): 53 return self.instance_connector.edit_pipe(self, patch=patch, debug=debug, **kw) 54 55 from meerschaum.config._paths import PIPES_CACHE_RESOURCES_PATH 56 from meerschaum.utils.misc import edit_file 57 parameters_filename = str(self) + '.yaml' 58 parameters_path = PIPES_CACHE_RESOURCES_PATH / parameters_filename 59 60 from meerschaum.utils.yaml import yaml 61 62 edit_text = f"Edit the parameters for {self}" 63 edit_top = '#' * (len(edit_text) + 4) 64 edit_header = edit_top + f'\n# {edit_text} #\n' + edit_top + '\n\n' 65 66 from meerschaum.config import get_config 67 parameters = dict(get_config('pipes', 'parameters', patch=True)) 68 from meerschaum.config._patch import apply_patch_to_config 69 parameters = apply_patch_to_config(parameters, self.parameters) 70 71 ### write parameters to yaml file 72 with open(parameters_path, 'w+') as f: 73 f.write(edit_header) 74 yaml.dump(parameters, stream=f, sort_keys=False) 75 76 ### only quit editing if yaml is valid 77 editing = True 78 while editing: 79 edit_file(parameters_path) 80 try: 81 with open(parameters_path, 'r') as f: 82 file_parameters = yaml.load(f.read()) 83 except Exception as e: 84 from meerschaum.utils.warnings import warn 85 warn(f"Invalid format defined for '{self}':\n\n{e}") 86 input(f"Press [Enter] to correct the configuration for '{self}': ") 87 else: 88 editing = False 89 90 self.parameters = file_parameters 91 92 if debug: 93 from meerschaum.utils.formatting import pprint 94 pprint(self.parameters) 95 96 with Venv(get_connector_plugin(self.instance_connector)): 97 return self.instance_connector.edit_pipe(self, patch=patch, debug=debug, **kw)
Edit a Pipe's configuration.
Parameters
- patch (bool, default False):
If
patch
is True, update parameters by cascading rather than overwriting. - interactive (bool, default False):
If
True
, open an editor for the user to make changes to the pipe's YAML file. - debug (bool, default False): Verbosity toggle.
Returns
- A
SuccessTuple
of success, message.
100def edit_definition( 101 self, 102 yes: bool = False, 103 noask: bool = False, 104 force: bool = False, 105 debug : bool = False, 106 **kw : Any 107) -> SuccessTuple: 108 """ 109 Edit a pipe's definition file and update its configuration. 110 **NOTE:** This function is interactive and should not be used in automated scripts! 111 112 Returns 113 ------- 114 A `SuccessTuple` of success, message. 115 116 """ 117 if self.temporary: 118 return False, "Cannot edit pipes created with `temporary=True` (read-only)." 119 120 from meerschaum.connectors import instance_types 121 if (self.connector is None) or self.connector.type not in instance_types: 122 return self.edit(interactive=True, debug=debug, **kw) 123 124 import json 125 from meerschaum.utils.warnings import info, warn 126 from meerschaum.utils.debug import dprint 127 from meerschaum.config._patch import apply_patch_to_config 128 from meerschaum.utils.misc import edit_file 129 130 _parameters = self.parameters 131 if 'fetch' not in _parameters: 132 _parameters['fetch'] = {} 133 134 def _edit_api(): 135 from meerschaum.utils.prompt import prompt, yes_no 136 info( 137 f"Please enter the keys of the source pipe from '{self.connector}'.\n" + 138 "Type 'None' for None, or empty when there is no default. Press [CTRL+C] to skip." 139 ) 140 141 _keys = { 'connector_keys' : None, 'metric_key' : None, 'location_key' : None } 142 for k in _keys: 143 _keys[k] = _parameters['fetch'].get(k, None) 144 145 for k, v in _keys.items(): 146 try: 147 _keys[k] = prompt(k.capitalize().replace('_', ' ') + ':', icon=True, default=v) 148 except KeyboardInterrupt: 149 continue 150 if _keys[k] in ('', 'None', '\'None\'', '[None]'): 151 _keys[k] = None 152 153 _parameters['fetch'] = apply_patch_to_config(_parameters['fetch'], _keys) 154 155 info("You may optionally specify additional filter parameters as JSON.") 156 print(" Parameters are translated into a 'WHERE x AND y' clause, and lists are IN clauses.") 157 print(" For example, the following JSON would correspond to 'WHERE x = 1 AND y IN (2, 3)':") 158 print(json.dumps({'x': 1, 'y': [2, 3]}, indent=2, separators=(',', ': '))) 159 if force or yes_no( 160 "Would you like to add additional filter parameters?", 161 yes=yes, noask=noask 162 ): 163 from meerschaum.config._paths import PIPES_CACHE_RESOURCES_PATH 164 definition_filename = str(self) + '.json' 165 definition_path = PIPES_CACHE_RESOURCES_PATH / definition_filename 166 try: 167 definition_path.touch() 168 with open(definition_path, 'w+') as f: 169 json.dump(_parameters.get('fetch', {}).get('params', {}), f, indent=2) 170 except Exception as e: 171 return False, f"Failed writing file '{definition_path}':\n" + str(e) 172 173 _params = None 174 while True: 175 edit_file(definition_path) 176 try: 177 with open(definition_path, 'r') as f: 178 _params = json.load(f) 179 except Exception as e: 180 warn(f'Failed to read parameters JSON:\n{e}', stack=False) 181 if force or yes_no( 182 "Would you like to try again?\n " 183 + "If not, the parameters JSON file will be ignored.", 184 noask=noask, yes=yes 185 ): 186 continue 187 _params = None 188 break 189 if _params is not None: 190 if 'fetch' not in _parameters: 191 _parameters['fetch'] = {} 192 _parameters['fetch']['params'] = _params 193 194 self.parameters = _parameters 195 return True, "Success" 196 197 def _edit_sql(): 198 import pathlib, os, textwrap 199 from meerschaum.config._paths import PIPES_CACHE_RESOURCES_PATH 200 from meerschaum.utils.misc import edit_file 201 definition_filename = str(self) + '.sql' 202 definition_path = PIPES_CACHE_RESOURCES_PATH / definition_filename 203 204 sql_definition = _parameters['fetch'].get('definition', None) 205 if sql_definition is None: 206 sql_definition = '' 207 sql_definition = textwrap.dedent(sql_definition).lstrip() 208 209 try: 210 definition_path.touch() 211 with open(definition_path, 'w+') as f: 212 f.write(sql_definition) 213 except Exception as e: 214 return False, f"Failed writing file '{definition_path}':\n" + str(e) 215 216 edit_file(definition_path) 217 try: 218 with open(definition_path, 'r') as f: 219 file_definition = f.read() 220 except Exception as e: 221 return False, f"Failed reading file '{definition_path}':\n" + str(e) 222 223 if sql_definition == file_definition: 224 return False, f"No changes made to definition for {self}." 225 226 if ' ' not in file_definition: 227 return False, f"Invalid SQL definition for {self}." 228 229 if debug: 230 dprint("Read SQL definition:\n\n" + file_definition) 231 _parameters['fetch']['definition'] = file_definition 232 self.parameters = _parameters 233 return True, "Success" 234 235 locals()['_edit_' + str(self.connector.type)]() 236 return self.edit(interactive=False, debug=debug, **kw)
Edit a pipe's definition file and update its configuration. NOTE: This function is interactive and should not be used in automated scripts!
Returns
- A
SuccessTuple
of success, message.
13def update(self, *args, **kw) -> SuccessTuple: 14 """ 15 Update a pipe's parameters in its instance. 16 """ 17 kw['interactive'] = False 18 return self.edit(*args, **kw)
Update a pipe's parameters in its instance.
40def sync( 41 self, 42 df: Union[ 43 pd.DataFrame, 44 Dict[str, List[Any]], 45 List[Dict[str, Any]], 46 InferFetch 47 ] = InferFetch, 48 begin: Union[datetime, int, str, None] = '', 49 end: Union[datetime, int, None] = None, 50 force: bool = False, 51 retries: int = 10, 52 min_seconds: int = 1, 53 check_existing: bool = True, 54 enforce_dtypes: bool = True, 55 blocking: bool = True, 56 workers: Optional[int] = None, 57 callback: Optional[Callable[[Tuple[bool, str]], Any]] = None, 58 error_callback: Optional[Callable[[Exception], Any]] = None, 59 chunksize: Optional[int] = -1, 60 sync_chunks: bool = True, 61 debug: bool = False, 62 _inplace: bool = True, 63 **kw: Any 64) -> SuccessTuple: 65 """ 66 Fetch new data from the source and update the pipe's table with new data. 67 68 Get new remote data via fetch, get existing data in the same time period, 69 and merge the two, only keeping the unseen data. 70 71 Parameters 72 ---------- 73 df: Union[None, pd.DataFrame, Dict[str, List[Any]]], default None 74 An optional DataFrame to sync into the pipe. Defaults to `None`. 75 76 begin: Union[datetime, int, str, None], default '' 77 Optionally specify the earliest datetime to search for data. 78 79 end: Union[datetime, int, str, None], default None 80 Optionally specify the latest datetime to search for data. 81 82 force: bool, default False 83 If `True`, keep trying to sync untul `retries` attempts. 84 85 retries: int, default 10 86 If `force`, how many attempts to try syncing before declaring failure. 87 88 min_seconds: Union[int, float], default 1 89 If `force`, how many seconds to sleep between retries. Defaults to `1`. 90 91 check_existing: bool, default True 92 If `True`, pull and diff with existing data from the pipe. 93 94 enforce_dtypes: bool, default True 95 If `True`, enforce dtypes on incoming data. 96 Set this to `False` if the incoming rows are expected to be of the correct dtypes. 97 98 blocking: bool, default True 99 If `True`, wait for sync to finish and return its result, otherwise 100 asyncronously sync (oxymoron?) and return success. Defaults to `True`. 101 Only intended for specific scenarios. 102 103 workers: Optional[int], default None 104 If provided and the instance connector is thread-safe 105 (`pipe.instance_connector.IS_THREAD_SAFE is True`), 106 limit concurrent sync to this many threads. 107 108 callback: Optional[Callable[[Tuple[bool, str]], Any]], default None 109 Callback function which expects a SuccessTuple as input. 110 Only applies when `blocking=False`. 111 112 error_callback: Optional[Callable[[Exception], Any]], default None 113 Callback function which expects an Exception as input. 114 Only applies when `blocking=False`. 115 116 chunksize: int, default -1 117 Specify the number of rows to sync per chunk. 118 If `-1`, resort to system configuration (default is `900`). 119 A `chunksize` of `None` will sync all rows in one transaction. 120 121 sync_chunks: bool, default True 122 If possible, sync chunks while fetching them into memory. 123 124 debug: bool, default False 125 Verbosity toggle. Defaults to False. 126 127 Returns 128 ------- 129 A `SuccessTuple` of success (`bool`) and message (`str`). 130 """ 131 from meerschaum.utils.debug import dprint, _checkpoint 132 from meerschaum.utils.formatting import get_console 133 from meerschaum.utils.venv import Venv 134 from meerschaum.connectors import get_connector_plugin 135 from meerschaum.utils.misc import df_is_chunk_generator, filter_keywords, filter_arguments 136 from meerschaum.utils.pool import get_pool 137 from meerschaum.config import get_config 138 139 if (callback is not None or error_callback is not None) and blocking: 140 warn("Callback functions are only executed when blocking = False. Ignoring...") 141 142 _checkpoint(_total=2, **kw) 143 144 if chunksize == 0: 145 chunksize = None 146 sync_chunks = False 147 148 begin, end = self.parse_date_bounds(begin, end) 149 kw.update({ 150 'begin': begin, 151 'end': end, 152 'force': force, 153 'retries': retries, 154 'min_seconds': min_seconds, 155 'check_existing': check_existing, 156 'blocking': blocking, 157 'workers': workers, 158 'callback': callback, 159 'error_callback': error_callback, 160 'sync_chunks': sync_chunks, 161 'chunksize': chunksize, 162 'safe_copy': True, 163 }) 164 165 ### NOTE: Invalidate `_exists` cache before and after syncing. 166 self._exists = None 167 168 def _sync( 169 p: mrsm.Pipe, 170 df: Union[ 171 'pd.DataFrame', 172 Dict[str, List[Any]], 173 List[Dict[str, Any]], 174 InferFetch 175 ] = InferFetch, 176 ) -> SuccessTuple: 177 if df is None: 178 p._exists = None 179 return ( 180 False, 181 f"You passed `None` instead of data into `sync()` for {p}.\n" 182 + "Omit the DataFrame to infer fetching.", 183 ) 184 ### Ensure that Pipe is registered. 185 if not p.temporary and p.get_id(debug=debug) is None: 186 ### NOTE: This may trigger an interactive session for plugins! 187 register_success, register_msg = p.register(debug=debug) 188 if not register_success: 189 if 'already' not in register_msg: 190 p._exists = None 191 return register_success, register_msg 192 193 ### If connector is a plugin with a `sync()` method, return that instead. 194 ### If the plugin does not have a `sync()` method but does have a `fetch()` method, 195 ### use that instead. 196 ### NOTE: The DataFrame must be omitted for the plugin sync method to apply. 197 ### If a DataFrame is provided, continue as expected. 198 if hasattr(df, 'MRSM_INFER_FETCH'): 199 try: 200 if p.connector is None: 201 if ':' not in p.connector_keys: 202 return True, f"{p} does not support fetching; nothing to do." 203 204 msg = f"{p} does not have a valid connector." 205 if p.connector_keys.startswith('plugin:'): 206 msg += f"\n Perhaps {p.connector_keys} has a syntax error?" 207 p._exists = None 208 return False, msg 209 except Exception: 210 p._exists = None 211 return False, f"Unable to create the connector for {p}." 212 213 ### Sync in place if this is a SQL pipe. 214 if ( 215 str(self.connector) == str(self.instance_connector) 216 and 217 hasattr(self.instance_connector, 'sync_pipe_inplace') 218 and 219 _inplace 220 and 221 get_config('system', 'experimental', 'inplace_sync') 222 ): 223 with Venv(get_connector_plugin(self.instance_connector)): 224 p._exists = None 225 _args, _kwargs = filter_arguments( 226 p.instance_connector.sync_pipe_inplace, 227 p, 228 debug=debug, 229 **kw 230 ) 231 return self.instance_connector.sync_pipe_inplace( 232 *_args, 233 **_kwargs 234 ) 235 236 ### Activate and invoke `sync(pipe)` for plugin connectors with `sync` methods. 237 try: 238 if getattr(p.connector, 'sync', None) is not None: 239 with Venv(get_connector_plugin(p.connector), debug=debug): 240 _args, _kwargs = filter_arguments( 241 p.connector.sync, 242 p, 243 debug=debug, 244 **kw 245 ) 246 return_tuple = p.connector.sync(*_args, **_kwargs) 247 p._exists = None 248 if not isinstance(return_tuple, tuple): 249 return_tuple = ( 250 False, 251 f"Plugin '{p.connector.label}' returned non-tuple value: {return_tuple}" 252 ) 253 return return_tuple 254 255 except Exception as e: 256 get_console().print_exception() 257 msg = f"Failed to sync {p} with exception: '" + str(e) + "'" 258 if debug: 259 error(msg, silent=False) 260 p._exists = None 261 return False, msg 262 263 ### Fetch the dataframe from the connector's `fetch()` method. 264 try: 265 with Venv(get_connector_plugin(p.connector), debug=debug): 266 df = p.fetch( 267 **filter_keywords( 268 p.fetch, 269 debug=debug, 270 **kw 271 ) 272 ) 273 kw['safe_copy'] = False 274 except Exception as e: 275 get_console().print_exception( 276 suppress=[ 277 'meerschaum/core/Pipe/_sync.py', 278 'meerschaum/core/Pipe/_fetch.py', 279 ] 280 ) 281 msg = f"Failed to fetch data from {p.connector}:\n {e}" 282 df = None 283 284 if df is None: 285 p._exists = None 286 return False, f"No data were fetched for {p}." 287 288 if isinstance(df, list): 289 if len(df) == 0: 290 return True, f"No new rows were returned for {p}." 291 292 ### May be a chunk hook results list. 293 if isinstance(df[0], tuple): 294 success = all([_success for _success, _ in df]) 295 message = '\n'.join([_message for _, _message in df]) 296 return success, message 297 298 if df is True: 299 p._exists = None 300 return True, f"{p} is being synced in parallel." 301 302 ### CHECKPOINT: Retrieved the DataFrame. 303 _checkpoint(**kw) 304 305 ### Allow for dataframe generators or iterables. 306 if df_is_chunk_generator(df): 307 kw['workers'] = p.get_num_workers(kw.get('workers', None)) 308 dt_col = p.columns.get('datetime', None) 309 pool = get_pool(workers=kw.get('workers', 1)) 310 if debug: 311 dprint(f"Received {type(df)}. Attempting to sync first chunk...") 312 313 try: 314 chunk = next(df) 315 except StopIteration: 316 return True, "Received an empty generator; nothing to do." 317 318 chunk_success, chunk_msg = _sync(p, chunk) 319 chunk_msg = '\n' + self._get_chunk_label(chunk, dt_col) + '\n' + chunk_msg 320 if not chunk_success: 321 return chunk_success, f"Unable to sync initial chunk for {p}:\n{chunk_msg}" 322 if debug: 323 dprint("Successfully synced the first chunk, attemping the rest...") 324 325 def _process_chunk(_chunk): 326 _chunk_attempts = 0 327 _max_chunk_attempts = 3 328 while _chunk_attempts < _max_chunk_attempts: 329 try: 330 _chunk_success, _chunk_msg = _sync(p, _chunk) 331 except Exception as e: 332 _chunk_success, _chunk_msg = False, str(e) 333 if _chunk_success: 334 break 335 _chunk_attempts += 1 336 _sleep_seconds = _chunk_attempts ** 2 337 warn( 338 ( 339 f"Failed to sync chunk to {self} " 340 + f"(attempt {_chunk_attempts} / {_max_chunk_attempts}).\n" 341 + f"Sleeping for {_sleep_seconds} second" 342 + ('s' if _sleep_seconds != 1 else '') 343 + ":\n{_chunk_msg}" 344 ), 345 stack=False, 346 ) 347 time.sleep(_sleep_seconds) 348 349 num_rows_str = ( 350 f"{num_rows:,} rows" 351 if (num_rows := len(_chunk)) != 1 352 else f"{num_rows} row" 353 ) 354 _chunk_msg = ( 355 ( 356 "Synced" 357 if _chunk_success 358 else "Failed to sync" 359 ) + f" a chunk ({num_rows_str}) to {p}:\n" 360 + self._get_chunk_label(_chunk, dt_col) 361 + '\n' 362 + _chunk_msg 363 ) 364 365 mrsm.pprint((_chunk_success, _chunk_msg), calm=True) 366 return _chunk_success, _chunk_msg 367 368 results = sorted( 369 [(chunk_success, chunk_msg)] + ( 370 list(pool.imap(_process_chunk, df)) 371 if ( 372 not df_is_chunk_generator(chunk) # Handle nested generators. 373 and kw.get('workers', 1) != 1 374 ) 375 else list( 376 _process_chunk(_child_chunks) 377 for _child_chunks in df 378 ) 379 ) 380 ) 381 chunk_messages = [chunk_msg for _, chunk_msg in results] 382 success_bools = [chunk_success for chunk_success, _ in results] 383 num_successes = len([chunk_success for chunk_success, _ in results if chunk_success]) 384 num_failures = len([chunk_success for chunk_success, _ in results if not chunk_success]) 385 success = all(success_bools) 386 msg = ( 387 'Synced ' 388 + f'{len(chunk_messages):,} chunk' 389 + ('s' if len(chunk_messages) != 1 else '') 390 + f' to {p}\n({num_successes} succeeded, {num_failures} failed):\n\n' 391 + '\n\n'.join(chunk_messages).lstrip().rstrip() 392 ).lstrip().rstrip() 393 return success, msg 394 395 ### Cast to a dataframe and ensure datatypes are what we expect. 396 df = self.enforce_dtypes( 397 df, 398 chunksize=chunksize, 399 enforce=enforce_dtypes, 400 debug=debug, 401 ) 402 403 ### Capture `numeric`, `uuid`, `json`, and `bytes` columns. 404 self._persist_new_json_columns(df, debug=debug) 405 self._persist_new_numeric_columns(df, debug=debug) 406 self._persist_new_uuid_columns(df, debug=debug) 407 self._persist_new_bytes_columns(df, debug=debug) 408 self._persist_new_geometry_columns(df, debug=debug) 409 410 if debug: 411 dprint( 412 "DataFrame to sync:\n" 413 + ( 414 str(df)[:255] 415 + '...' 416 if len(str(df)) >= 256 417 else str(df) 418 ), 419 **kw 420 ) 421 422 ### if force, continue to sync until success 423 return_tuple = False, f"Did not sync {p}." 424 run = True 425 _retries = 1 426 while run: 427 with Venv(get_connector_plugin(self.instance_connector)): 428 return_tuple = p.instance_connector.sync_pipe( 429 pipe=p, 430 df=df, 431 debug=debug, 432 **kw 433 ) 434 _retries += 1 435 run = (not return_tuple[0]) and force and _retries <= retries 436 if run and debug: 437 dprint(f"Syncing failed for {p}. Attempt ( {_retries} / {retries} )", **kw) 438 dprint(f"Sleeping for {min_seconds} seconds...", **kw) 439 time.sleep(min_seconds) 440 if _retries > retries: 441 warn( 442 f"Unable to sync {p} within {retries} attempt" + 443 ("s" if retries != 1 else "") + "!" 444 ) 445 446 ### CHECKPOINT: Finished syncing. Handle caching. 447 _checkpoint(**kw) 448 if self.cache_pipe is not None: 449 if debug: 450 dprint("Caching retrieved dataframe.", **kw) 451 _sync_cache_tuple = self.cache_pipe.sync(df, debug=debug, **kw) 452 if not _sync_cache_tuple[0]: 453 warn(f"Failed to sync local cache for {self}.") 454 455 self._exists = None 456 return return_tuple 457 458 if blocking: 459 self._exists = None 460 return _sync(self, df=df) 461 462 from meerschaum.utils.threading import Thread 463 def default_callback(result_tuple: SuccessTuple): 464 dprint(f"Asynchronous result from {self}: {result_tuple}", **kw) 465 466 def default_error_callback(x: Exception): 467 dprint(f"Error received for {self}: {x}", **kw) 468 469 if callback is None and debug: 470 callback = default_callback 471 if error_callback is None and debug: 472 error_callback = default_error_callback 473 try: 474 thread = Thread( 475 target=_sync, 476 args=(self,), 477 kwargs={'df': df}, 478 daemon=False, 479 callback=callback, 480 error_callback=error_callback, 481 ) 482 thread.start() 483 except Exception as e: 484 self._exists = None 485 return False, str(e) 486 487 self._exists = None 488 return True, f"Spawned asyncronous sync for {self}."
Fetch new data from the source and update the pipe's table with new data.
Get new remote data via fetch, get existing data in the same time period, and merge the two, only keeping the unseen data.
Parameters
- df (Union[None, pd.DataFrame, Dict[str, List[Any]]], default None):
An optional DataFrame to sync into the pipe. Defaults to
None
. - begin (Union[datetime, int, str, None], default ''): Optionally specify the earliest datetime to search for data.
- end (Union[datetime, int, str, None], default None): Optionally specify the latest datetime to search for data.
- force (bool, default False):
If
True
, keep trying to sync untulretries
attempts. - retries (int, default 10):
If
force
, how many attempts to try syncing before declaring failure. - min_seconds (Union[int, float], default 1):
If
force
, how many seconds to sleep between retries. Defaults to1
. - check_existing (bool, default True):
If
True
, pull and diff with existing data from the pipe. - enforce_dtypes (bool, default True):
If
True
, enforce dtypes on incoming data. Set this toFalse
if the incoming rows are expected to be of the correct dtypes. - blocking (bool, default True):
If
True
, wait for sync to finish and return its result, otherwise asyncronously sync (oxymoron?) and return success. Defaults toTrue
. Only intended for specific scenarios. - workers (Optional[int], default None):
If provided and the instance connector is thread-safe
(
pipe.instance_connector.IS_THREAD_SAFE is True
), limit concurrent sync to this many threads. - callback (Optional[Callable[[Tuple[bool, str]], Any]], default None):
Callback function which expects a SuccessTuple as input.
Only applies when
blocking=False
. - error_callback (Optional[Callable[[Exception], Any]], default None):
Callback function which expects an Exception as input.
Only applies when
blocking=False
. - chunksize (int, default -1):
Specify the number of rows to sync per chunk.
If
-1
, resort to system configuration (default is900
). Achunksize
ofNone
will sync all rows in one transaction. - sync_chunks (bool, default True): If possible, sync chunks while fetching them into memory.
- debug (bool, default False): Verbosity toggle. Defaults to False.
Returns
- A
SuccessTuple
of success (bool
) and message (str
).
491def get_sync_time( 492 self, 493 params: Optional[Dict[str, Any]] = None, 494 newest: bool = True, 495 apply_backtrack_interval: bool = False, 496 remote: bool = False, 497 round_down: bool = False, 498 debug: bool = False 499) -> Union['datetime', int, None]: 500 """ 501 Get the most recent datetime value for a Pipe. 502 503 Parameters 504 ---------- 505 params: Optional[Dict[str, Any]], default None 506 Dictionary to build a WHERE clause for a specific column. 507 See `meerschaum.utils.sql.build_where`. 508 509 newest: bool, default True 510 If `True`, get the most recent datetime (honoring `params`). 511 If `False`, get the oldest datetime (`ASC` instead of `DESC`). 512 513 apply_backtrack_interval: bool, default False 514 If `True`, subtract the backtrack interval from the sync time. 515 516 remote: bool, default False 517 If `True` and the instance connector supports it, return the sync time 518 for the remote table definition. 519 520 round_down: bool, default False 521 If `True`, round down the datetime value to the nearest minute. 522 523 debug: bool, default False 524 Verbosity toggle. 525 526 Returns 527 ------- 528 A `datetime` or int, if the pipe exists, otherwise `None`. 529 530 """ 531 from meerschaum.utils.venv import Venv 532 from meerschaum.connectors import get_connector_plugin 533 from meerschaum.utils.misc import round_time, filter_keywords 534 from meerschaum.utils.warnings import warn 535 536 if not self.columns.get('datetime', None): 537 return None 538 539 connector = self.instance_connector if not remote else self.connector 540 with Venv(get_connector_plugin(connector)): 541 if not hasattr(connector, 'get_sync_time'): 542 warn( 543 f"Connectors of type '{connector.type}' " 544 "do not implement `get_sync_time().", 545 stack=False, 546 ) 547 return None 548 sync_time = connector.get_sync_time( 549 self, 550 **filter_keywords( 551 connector.get_sync_time, 552 params=params, 553 newest=newest, 554 remote=remote, 555 debug=debug, 556 ) 557 ) 558 559 if round_down and isinstance(sync_time, datetime): 560 sync_time = round_time(sync_time, timedelta(minutes=1)) 561 562 if apply_backtrack_interval and sync_time is not None: 563 backtrack_interval = self.get_backtrack_interval(debug=debug) 564 try: 565 sync_time -= backtrack_interval 566 except Exception as e: 567 warn(f"Failed to apply backtrack interval:\n{e}") 568 569 return self.parse_date_bounds(sync_time)
Get the most recent datetime value for a Pipe.
Parameters
- params (Optional[Dict[str, Any]], default None):
Dictionary to build a WHERE clause for a specific column.
See
meerschaum.utils.sql.build_where
. - newest (bool, default True):
If
True
, get the most recent datetime (honoringparams
). IfFalse
, get the oldest datetime (ASC
instead ofDESC
). - apply_backtrack_interval (bool, default False):
If
True
, subtract the backtrack interval from the sync time. - remote (bool, default False):
If
True
and the instance connector supports it, return the sync time for the remote table definition. - round_down (bool, default False):
If
True
, round down the datetime value to the nearest minute. - debug (bool, default False): Verbosity toggle.
Returns
- A
datetime
or int, if the pipe exists, otherwiseNone
.
572def exists( 573 self, 574 debug: bool = False 575) -> bool: 576 """ 577 See if a Pipe's table exists. 578 579 Parameters 580 ---------- 581 debug: bool, default False 582 Verbosity toggle. 583 584 Returns 585 ------- 586 A `bool` corresponding to whether a pipe's underlying table exists. 587 588 """ 589 import time 590 from meerschaum.utils.venv import Venv 591 from meerschaum.connectors import get_connector_plugin 592 from meerschaum.config import STATIC_CONFIG 593 from meerschaum.utils.debug import dprint 594 now = time.perf_counter() 595 exists_timeout_seconds = STATIC_CONFIG['pipes']['exists_timeout_seconds'] 596 597 _exists = self.__dict__.get('_exists', None) 598 if _exists: 599 exists_timestamp = self.__dict__.get('_exists_timestamp', None) 600 if exists_timestamp is not None: 601 delta = now - exists_timestamp 602 if delta < exists_timeout_seconds: 603 if debug: 604 dprint(f"Returning cached `exists` for {self} ({round(delta, 2)} seconds old).") 605 return _exists 606 607 with Venv(get_connector_plugin(self.instance_connector)): 608 _exists = ( 609 self.instance_connector.pipe_exists(pipe=self, debug=debug) 610 if hasattr(self.instance_connector, 'pipe_exists') 611 else False 612 ) 613 614 self.__dict__['_exists'] = _exists 615 self.__dict__['_exists_timestamp'] = now 616 return _exists
See if a Pipe's table exists.
Parameters
- debug (bool, default False): Verbosity toggle.
Returns
- A
bool
corresponding to whether a pipe's underlying table exists.
619def filter_existing( 620 self, 621 df: 'pd.DataFrame', 622 safe_copy: bool = True, 623 date_bound_only: bool = False, 624 include_unchanged_columns: bool = False, 625 enforce_dtypes: bool = False, 626 chunksize: Optional[int] = -1, 627 debug: bool = False, 628 **kw 629) -> Tuple['pd.DataFrame', 'pd.DataFrame', 'pd.DataFrame']: 630 """ 631 Inspect a dataframe and filter out rows which already exist in the pipe. 632 633 Parameters 634 ---------- 635 df: 'pd.DataFrame' 636 The dataframe to inspect and filter. 637 638 safe_copy: bool, default True 639 If `True`, create a copy before comparing and modifying the dataframes. 640 Setting to `False` may mutate the DataFrames. 641 See `meerschaum.utils.dataframe.filter_unseen_df`. 642 643 date_bound_only: bool, default False 644 If `True`, only use the datetime index to fetch the sample dataframe. 645 646 include_unchanged_columns: bool, default False 647 If `True`, include the backtrack columns which haven't changed in the update dataframe. 648 This is useful if you can't update individual keys. 649 650 enforce_dtypes: bool, default False 651 If `True`, ensure the given and intermediate dataframes are enforced to the correct dtypes. 652 Setting `enforce_dtypes=True` may impact performance. 653 654 chunksize: Optional[int], default -1 655 The `chunksize` used when fetching existing data. 656 657 debug: bool, default False 658 Verbosity toggle. 659 660 Returns 661 ------- 662 A tuple of three pandas DataFrames: unseen, update, and delta. 663 """ 664 from meerschaum.utils.warnings import warn 665 from meerschaum.utils.debug import dprint 666 from meerschaum.utils.packages import attempt_import, import_pandas 667 from meerschaum.utils.misc import round_time 668 from meerschaum.utils.dataframe import ( 669 filter_unseen_df, 670 add_missing_cols_to_df, 671 get_unhashable_cols, 672 ) 673 from meerschaum.utils.dtypes import ( 674 to_pandas_dtype, 675 none_if_null, 676 to_datetime, 677 are_dtypes_equal, 678 value_is_null, 679 ) 680 from meerschaum.config import get_config 681 pd = import_pandas() 682 pandas = attempt_import('pandas') 683 if enforce_dtypes or 'dataframe' not in str(type(df)).lower(): 684 df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug) 685 is_dask = hasattr('df', '__module__') and 'dask' in df.__module__ 686 if is_dask: 687 dd = attempt_import('dask.dataframe') 688 merge = dd.merge 689 NA = pandas.NA 690 else: 691 merge = pd.merge 692 NA = pd.NA 693 694 primary_key = self.columns.get('primary', None) 695 autoincrement = self.parameters.get('autoincrement', False) 696 pipe_columns = self.columns.copy() 697 698 if primary_key and autoincrement and df is not None and primary_key in df.columns: 699 if safe_copy: 700 df = df.copy() 701 safe_copy = False 702 if df[primary_key].isnull().all(): 703 del df[primary_key] 704 _ = self.columns.pop(primary_key, None) 705 706 def get_empty_df(): 707 empty_df = pd.DataFrame([]) 708 dtypes = dict(df.dtypes) if df is not None else {} 709 dtypes.update(self.dtypes) 710 pd_dtypes = { 711 col: to_pandas_dtype(str(typ)) 712 for col, typ in dtypes.items() 713 } 714 return add_missing_cols_to_df(empty_df, pd_dtypes) 715 716 if df is None: 717 empty_df = get_empty_df() 718 return empty_df, empty_df, empty_df 719 720 if (df.empty if not is_dask else len(df) == 0): 721 return df, df, df 722 723 ### begin is the oldest data in the new dataframe 724 begin, end = None, None 725 dt_col = pipe_columns.get('datetime', None) 726 primary_key = pipe_columns.get('primary', None) 727 dt_type = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None 728 729 if autoincrement and primary_key == dt_col and dt_col not in df.columns: 730 if enforce_dtypes: 731 df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug) 732 return df, get_empty_df(), df 733 734 try: 735 min_dt_val = df[dt_col].min(skipna=True) if dt_col and dt_col in df.columns else None 736 if is_dask and min_dt_val is not None: 737 min_dt_val = min_dt_val.compute() 738 min_dt = ( 739 to_datetime(min_dt_val, as_pydatetime=True) 740 if min_dt_val is not None and are_dtypes_equal(dt_type, 'datetime') 741 else min_dt_val 742 ) 743 except Exception: 744 min_dt = None 745 746 if not are_dtypes_equal('datetime', str(type(min_dt))) or value_is_null(min_dt): 747 if not are_dtypes_equal('int', str(type(min_dt))): 748 min_dt = None 749 750 if isinstance(min_dt, datetime): 751 rounded_min_dt = round_time(min_dt, to='down') 752 try: 753 begin = rounded_min_dt - timedelta(minutes=1) 754 except OverflowError: 755 begin = rounded_min_dt 756 elif dt_type and 'int' in dt_type.lower(): 757 begin = min_dt 758 elif dt_col is None: 759 begin = None 760 761 ### end is the newest data in the new dataframe 762 try: 763 max_dt_val = df[dt_col].max(skipna=True) if dt_col and dt_col in df.columns else None 764 if is_dask and max_dt_val is not None: 765 max_dt_val = max_dt_val.compute() 766 max_dt = ( 767 to_datetime(max_dt_val, as_pydatetime=True) 768 if max_dt_val is not None and 'datetime' in str(dt_type) 769 else max_dt_val 770 ) 771 except Exception: 772 import traceback 773 traceback.print_exc() 774 max_dt = None 775 776 if not are_dtypes_equal('datetime', str(type(max_dt))) or value_is_null(max_dt): 777 if not are_dtypes_equal('int', str(type(max_dt))): 778 max_dt = None 779 780 if isinstance(max_dt, datetime): 781 end = ( 782 round_time( 783 max_dt, 784 to='down' 785 ) + timedelta(minutes=1) 786 ) 787 elif dt_type and 'int' in dt_type.lower() and max_dt is not None: 788 end = max_dt + 1 789 790 if max_dt is not None and min_dt is not None and min_dt > max_dt: 791 warn("Detected minimum datetime greater than maximum datetime.") 792 793 if begin is not None and end is not None and begin > end: 794 if isinstance(begin, datetime): 795 begin = end - timedelta(minutes=1) 796 ### We might be using integers for the datetime axis. 797 else: 798 begin = end - 1 799 800 unique_index_vals = { 801 col: df[col].unique() 802 for col in (pipe_columns if not primary_key else [primary_key]) 803 if col in df.columns and col != dt_col 804 } if not date_bound_only else {} 805 filter_params_index_limit = get_config('pipes', 'sync', 'filter_params_index_limit') 806 _ = kw.pop('params', None) 807 params = { 808 col: [ 809 none_if_null(val) 810 for val in unique_vals 811 ] 812 for col, unique_vals in unique_index_vals.items() 813 if len(unique_vals) <= filter_params_index_limit 814 } if not date_bound_only else {} 815 816 if debug: 817 dprint(f"Looking at data between '{begin}' and '{end}':", **kw) 818 819 backtrack_df = self.get_data( 820 begin=begin, 821 end=end, 822 chunksize=chunksize, 823 params=params, 824 debug=debug, 825 **kw 826 ) 827 if backtrack_df is None: 828 if debug: 829 dprint(f"No backtrack data was found for {self}.") 830 return df, get_empty_df(), df 831 832 if enforce_dtypes: 833 backtrack_df = self.enforce_dtypes(backtrack_df, chunksize=chunksize, debug=debug) 834 835 if debug: 836 dprint(f"Existing data for {self}:\n" + str(backtrack_df), **kw) 837 dprint(f"Existing dtypes for {self}:\n" + str(backtrack_df.dtypes)) 838 839 ### Separate new rows from changed ones. 840 on_cols = [ 841 col 842 for col_key, col in pipe_columns.items() 843 if ( 844 col 845 and 846 col_key != 'value' 847 and col in backtrack_df.columns 848 ) 849 ] if not primary_key else [primary_key] 850 self_dtypes = self.dtypes 851 on_cols_dtypes = { 852 col: to_pandas_dtype(typ) 853 for col, typ in self_dtypes.items() 854 if col in on_cols 855 } 856 857 ### Detect changes between the old target and new source dataframes. 858 delta_df = add_missing_cols_to_df( 859 filter_unseen_df( 860 backtrack_df, 861 df, 862 dtypes={ 863 col: to_pandas_dtype(typ) 864 for col, typ in self_dtypes.items() 865 }, 866 safe_copy=safe_copy, 867 coerce_mixed_numerics=(not self.static), 868 debug=debug 869 ), 870 on_cols_dtypes, 871 ) 872 if enforce_dtypes: 873 delta_df = self.enforce_dtypes(delta_df, chunksize=chunksize, debug=debug) 874 875 ### Cast dicts or lists to strings so we can merge. 876 serializer = functools.partial(json.dumps, sort_keys=True, separators=(',', ':'), default=str) 877 878 def deserializer(x): 879 return json.loads(x) if isinstance(x, str) else x 880 881 unhashable_delta_cols = get_unhashable_cols(delta_df) 882 unhashable_backtrack_cols = get_unhashable_cols(backtrack_df) 883 for col in unhashable_delta_cols: 884 delta_df[col] = delta_df[col].apply(serializer) 885 for col in unhashable_backtrack_cols: 886 backtrack_df[col] = backtrack_df[col].apply(serializer) 887 casted_cols = set(unhashable_delta_cols + unhashable_backtrack_cols) 888 889 joined_df = merge( 890 delta_df.infer_objects(copy=False).fillna(NA), 891 backtrack_df.infer_objects(copy=False).fillna(NA), 892 how='left', 893 on=on_cols, 894 indicator=True, 895 suffixes=('', '_old'), 896 ) if on_cols else delta_df 897 for col in casted_cols: 898 if col in joined_df.columns: 899 joined_df[col] = joined_df[col].apply(deserializer) 900 if col in delta_df.columns: 901 delta_df[col] = delta_df[col].apply(deserializer) 902 903 ### Determine which rows are completely new. 904 new_rows_mask = (joined_df['_merge'] == 'left_only') if on_cols else None 905 cols = list(delta_df.columns) 906 907 unseen_df = ( 908 joined_df 909 .where(new_rows_mask) 910 .dropna(how='all')[cols] 911 .reset_index(drop=True) 912 ) if on_cols else delta_df 913 914 ### Rows that have already been inserted but values have changed. 915 update_df = ( 916 joined_df 917 .where(~new_rows_mask) 918 .dropna(how='all')[cols] 919 .reset_index(drop=True) 920 ) if on_cols else get_empty_df() 921 922 if include_unchanged_columns and on_cols: 923 unchanged_backtrack_cols = [ 924 col 925 for col in backtrack_df.columns 926 if col in on_cols or col not in update_df.columns 927 ] 928 if enforce_dtypes: 929 update_df = self.enforce_dtypes(update_df, chunksize=chunksize, debug=debug) 930 update_df = merge( 931 backtrack_df[unchanged_backtrack_cols], 932 update_df, 933 how='inner', 934 on=on_cols, 935 ) 936 937 return unseen_df, update_df, delta_df
Inspect a dataframe and filter out rows which already exist in the pipe.
Parameters
- df ('pd.DataFrame'): The dataframe to inspect and filter.
- safe_copy (bool, default True):
If
True
, create a copy before comparing and modifying the dataframes. Setting toFalse
may mutate the DataFrames. Seemeerschaum.utils.dataframe.filter_unseen_df
. - date_bound_only (bool, default False):
If
True
, only use the datetime index to fetch the sample dataframe. - include_unchanged_columns (bool, default False):
If
True
, include the backtrack columns which haven't changed in the update dataframe. This is useful if you can't update individual keys. - enforce_dtypes (bool, default False):
If
True
, ensure the given and intermediate dataframes are enforced to the correct dtypes. Settingenforce_dtypes=True
may impact performance. - chunksize (Optional[int], default -1):
The
chunksize
used when fetching existing data. - debug (bool, default False): Verbosity toggle.
Returns
- A tuple of three pandas DataFrames (unseen, update, and delta.):
962def get_num_workers(self, workers: Optional[int] = None) -> int: 963 """ 964 Get the number of workers to use for concurrent syncs. 965 966 Parameters 967 ---------- 968 The number of workers passed via `--workers`. 969 970 Returns 971 ------- 972 The number of workers, capped for safety. 973 """ 974 is_thread_safe = getattr(self.instance_connector, 'IS_THREAD_SAFE', False) 975 if not is_thread_safe: 976 return 1 977 978 engine_pool_size = ( 979 self.instance_connector.engine.pool.size() 980 if self.instance_connector.type == 'sql' 981 else None 982 ) 983 current_num_threads = threading.active_count() 984 current_num_connections = ( 985 self.instance_connector.engine.pool.checkedout() 986 if engine_pool_size is not None 987 else current_num_threads 988 ) 989 desired_workers = ( 990 min(workers or engine_pool_size, engine_pool_size) 991 if engine_pool_size is not None 992 else workers 993 ) 994 if desired_workers is None: 995 desired_workers = (multiprocessing.cpu_count() if is_thread_safe else 1) 996 997 return max( 998 (desired_workers - current_num_connections), 999 1, 1000 )
Get the number of workers to use for concurrent syncs.
Parameters
- The number of workers passed via
--workers
.
Returns
- The number of workers, capped for safety.
19def verify( 20 self, 21 begin: Union[datetime, int, None] = None, 22 end: Union[datetime, int, None] = None, 23 params: Optional[Dict[str, Any]] = None, 24 chunk_interval: Union[timedelta, int, None] = None, 25 bounded: Optional[bool] = None, 26 deduplicate: bool = False, 27 workers: Optional[int] = None, 28 batchsize: Optional[int] = None, 29 skip_chunks_with_greater_rowcounts: bool = False, 30 check_rowcounts_only: bool = False, 31 debug: bool = False, 32 **kwargs: Any 33) -> SuccessTuple: 34 """ 35 Verify the contents of the pipe by resyncing its interval. 36 37 Parameters 38 ---------- 39 begin: Union[datetime, int, None], default None 40 If specified, only verify rows greater than or equal to this value. 41 42 end: Union[datetime, int, None], default None 43 If specified, only verify rows less than this value. 44 45 chunk_interval: Union[timedelta, int, None], default None 46 If provided, use this as the size of the chunk boundaries. 47 Default to the value set in `pipe.parameters['chunk_minutes']` (1440). 48 49 bounded: Optional[bool], default None 50 If `True`, do not verify older than the oldest sync time or newer than the newest. 51 If `False`, verify unbounded syncs outside of the new and old sync times. 52 The default behavior (`None`) is to bound only if a bound interval is set 53 (e.g. `pipe.parameters['verify']['bound_days']`). 54 55 deduplicate: bool, default False 56 If `True`, deduplicate the pipe's table after the verification syncs. 57 58 workers: Optional[int], default None 59 If provided, limit the verification to this many threads. 60 Use a value of `1` to sync chunks in series. 61 62 batchsize: Optional[int], default None 63 If provided, sync this many chunks in parallel. 64 Defaults to `Pipe.get_num_workers()`. 65 66 skip_chunks_with_greater_rowcounts: bool, default False 67 If `True`, compare the rowcounts for a chunk and skip syncing if the pipe's 68 chunk rowcount equals or exceeds the remote's rowcount. 69 70 check_rowcounts_only: bool, default False 71 If `True`, only compare rowcounts and print chunks which are out-of-sync. 72 73 debug: bool, default False 74 Verbosity toggle. 75 76 kwargs: Any 77 All keyword arguments are passed to `pipe.sync()`. 78 79 Returns 80 ------- 81 A SuccessTuple indicating whether the pipe was successfully resynced. 82 """ 83 from meerschaum.utils.pool import get_pool 84 from meerschaum.utils.formatting import make_header 85 from meerschaum.utils.misc import interval_str 86 workers = self.get_num_workers(workers) 87 check_rowcounts = skip_chunks_with_greater_rowcounts or check_rowcounts_only 88 89 ### Skip configured bounding in parameters 90 ### if `bounded` is explicitly `False`. 91 bound_time = ( 92 self.get_bound_time(debug=debug) 93 if bounded is not False 94 else None 95 ) 96 if bounded is None: 97 bounded = bound_time is not None 98 99 if bounded and begin is None: 100 begin = ( 101 bound_time 102 if bound_time is not None 103 else self.get_sync_time(newest=False, debug=debug) 104 ) 105 if begin is None: 106 remote_oldest_sync_time = self.get_sync_time(newest=False, remote=True, debug=debug) 107 begin = remote_oldest_sync_time 108 if bounded and end is None: 109 end = self.get_sync_time(newest=True, debug=debug) 110 if end is None: 111 remote_newest_sync_time = self.get_sync_time(newest=True, remote=True, debug=debug) 112 end = remote_newest_sync_time 113 if end is not None: 114 end += ( 115 timedelta(minutes=1) 116 if hasattr(end, 'tzinfo') 117 else 1 118 ) 119 120 begin, end = self.parse_date_bounds(begin, end) 121 cannot_determine_bounds = bounded and begin is None and end is None 122 123 if cannot_determine_bounds and not check_rowcounts_only: 124 warn(f"Cannot determine sync bounds for {self}. Syncing instead...", stack=False) 125 sync_success, sync_msg = self.sync( 126 begin=begin, 127 end=end, 128 params=params, 129 workers=workers, 130 debug=debug, 131 **kwargs 132 ) 133 if not sync_success: 134 return sync_success, sync_msg 135 136 if deduplicate: 137 return self.deduplicate( 138 begin=begin, 139 end=end, 140 params=params, 141 workers=workers, 142 debug=debug, 143 **kwargs 144 ) 145 return sync_success, sync_msg 146 147 chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug) 148 chunk_bounds = self.get_chunk_bounds( 149 begin=begin, 150 end=end, 151 chunk_interval=chunk_interval, 152 bounded=bounded, 153 debug=debug, 154 ) 155 156 ### Consider it a success if no chunks need to be verified. 157 if not chunk_bounds: 158 if deduplicate: 159 return self.deduplicate( 160 begin=begin, 161 end=end, 162 params=params, 163 workers=workers, 164 debug=debug, 165 **kwargs 166 ) 167 return True, f"Could not determine chunks between '{begin}' and '{end}'; nothing to do." 168 169 begin_to_print = ( 170 begin 171 if begin is not None 172 else ( 173 chunk_bounds[0][0] 174 if bounded 175 else chunk_bounds[0][1] 176 ) 177 ) 178 end_to_print = ( 179 end 180 if end is not None 181 else ( 182 chunk_bounds[-1][1] 183 if bounded 184 else chunk_bounds[-1][0] 185 ) 186 ) 187 message_header = f"{begin_to_print} - {end_to_print}" 188 max_chunks_syncs = mrsm.get_config('pipes', 'verify', 'max_chunks_syncs') 189 190 info( 191 f"Verifying {self}:\n " 192 + ("Syncing" if not check_rowcounts_only else "Checking") 193 + f" {len(chunk_bounds)} chunk" 194 + ('s' if len(chunk_bounds) != 1 else '') 195 + f" ({'un' if not bounded else ''}bounded)" 196 + f" of size '{interval_str(chunk_interval)}'" 197 + f" between '{begin_to_print}' and '{end_to_print}'.\n" 198 ) 199 200 ### Dictionary of the form bounds -> success_tuple, e.g.: 201 ### { 202 ### (2023-01-01, 2023-01-02): (True, "Success") 203 ### } 204 bounds_success_tuples = {} 205 def process_chunk_bounds( 206 chunk_begin_and_end: Tuple[ 207 Union[int, datetime], 208 Union[int, datetime] 209 ], 210 _workers: Optional[int] = 1, 211 ): 212 if chunk_begin_and_end in bounds_success_tuples: 213 return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end] 214 215 chunk_begin, chunk_end = chunk_begin_and_end 216 do_sync = True 217 chunk_success, chunk_msg = False, "Did not sync chunk." 218 if check_rowcounts: 219 existing_rowcount = self.get_rowcount(begin=chunk_begin, end=chunk_end, debug=debug) 220 remote_rowcount = self.get_rowcount( 221 begin=chunk_begin, 222 end=chunk_end, 223 remote=True, 224 debug=debug, 225 ) 226 checked_rows_str = ( 227 f"checked {existing_rowcount:,} row" 228 + ("s" if existing_rowcount != 1 else '') 229 + f" vs {remote_rowcount:,} remote" 230 ) 231 if ( 232 existing_rowcount is not None 233 and remote_rowcount is not None 234 and existing_rowcount >= remote_rowcount 235 ): 236 do_sync = False 237 chunk_success, chunk_msg = True, ( 238 "Row-count is up-to-date " 239 f"({checked_rows_str})." 240 ) 241 elif check_rowcounts_only: 242 do_sync = False 243 chunk_success, chunk_msg = True, ( 244 f"Row-counts are out-of-sync ({checked_rows_str})." 245 ) 246 247 num_syncs = 0 248 while num_syncs < max_chunks_syncs: 249 chunk_success, chunk_msg = self.sync( 250 begin=chunk_begin, 251 end=chunk_end, 252 params=params, 253 workers=_workers, 254 debug=debug, 255 **kwargs 256 ) if do_sync else (chunk_success, chunk_msg) 257 if chunk_success: 258 break 259 num_syncs += 1 260 time.sleep(num_syncs**2) 261 chunk_msg = chunk_msg.strip() 262 if ' - ' not in chunk_msg: 263 chunk_label = f"{chunk_begin} - {chunk_end}" 264 chunk_msg = f'Verified chunk for {self}:\n{chunk_label}\n{chunk_msg}' 265 mrsm.pprint((chunk_success, chunk_msg)) 266 267 return chunk_begin_and_end, (chunk_success, chunk_msg) 268 269 ### If we have more than one chunk, attempt to sync the first one and return if its fails. 270 if len(chunk_bounds) > 1: 271 first_chunk_bounds = chunk_bounds[0] 272 first_label = f"{first_chunk_bounds[0]} - {first_chunk_bounds[1]}" 273 info(f"Verifying first chunk for {self}:\n {first_label}") 274 ( 275 (first_begin, first_end), 276 (first_success, first_msg) 277 ) = process_chunk_bounds(first_chunk_bounds, _workers=workers) 278 if not first_success: 279 return ( 280 first_success, 281 f"\n{first_label}\n" 282 + f"Failed to sync first chunk:\n{first_msg}" 283 ) 284 bounds_success_tuples[first_chunk_bounds] = (first_success, first_msg) 285 info(f"Completed first chunk for {self}:\n {first_label}\n") 286 chunk_bounds = chunk_bounds[1:] 287 288 pool = get_pool(workers=workers) 289 batches = self.get_chunk_bounds_batches(chunk_bounds, batchsize=batchsize, workers=workers) 290 291 def process_batch( 292 batch_chunk_bounds: Tuple[ 293 Tuple[Union[datetime, int, None], Union[datetime, int, None]], 294 ... 295 ] 296 ): 297 _batch_begin = batch_chunk_bounds[0][0] 298 _batch_end = batch_chunk_bounds[-1][-1] 299 batch_message_header = f"{_batch_begin} - {_batch_end}" 300 301 if check_rowcounts_only: 302 info(f"Checking row-counts for batch bounds:\n {batch_message_header}") 303 _, (batch_init_success, batch_init_msg) = process_chunk_bounds( 304 (_batch_begin, _batch_end) 305 ) 306 mrsm.pprint((batch_init_success, batch_init_msg)) 307 if batch_init_success and 'up-to-date' in batch_init_msg: 308 info("Entire batch is up-to-date.") 309 return batch_init_success, batch_init_msg 310 311 batch_bounds_success_tuples = dict(pool.map(process_chunk_bounds, batch_chunk_bounds)) 312 bounds_success_tuples.update(batch_bounds_success_tuples) 313 batch_bounds_success_bools = { 314 bounds: tup[0] 315 for bounds, tup in batch_bounds_success_tuples.items() 316 } 317 318 if all(batch_bounds_success_bools.values()): 319 msg = get_chunks_success_message( 320 batch_bounds_success_tuples, 321 header=batch_message_header, 322 check_rowcounts_only=check_rowcounts_only, 323 ) 324 if deduplicate: 325 deduplicate_success, deduplicate_msg = self.deduplicate( 326 begin=_batch_begin, 327 end=_batch_end, 328 params=params, 329 workers=workers, 330 debug=debug, 331 **kwargs 332 ) 333 return deduplicate_success, msg + '\n\n' + deduplicate_msg 334 return True, msg 335 336 batch_chunk_bounds_to_resync = [ 337 bounds 338 for bounds, success in zip(batch_chunk_bounds, batch_bounds_success_bools) 339 if not success 340 ] 341 batch_bounds_to_print = [ 342 f"{bounds[0]} - {bounds[1]}" 343 for bounds in batch_chunk_bounds_to_resync 344 ] 345 if batch_bounds_to_print: 346 warn( 347 "Will resync the following failed chunks:\n " 348 + '\n '.join(batch_bounds_to_print), 349 stack=False, 350 ) 351 352 retry_bounds_success_tuples = dict(pool.map( 353 process_chunk_bounds, 354 batch_chunk_bounds_to_resync 355 )) 356 batch_bounds_success_tuples.update(retry_bounds_success_tuples) 357 bounds_success_tuples.update(retry_bounds_success_tuples) 358 retry_bounds_success_bools = { 359 bounds: tup[0] 360 for bounds, tup in retry_bounds_success_tuples.items() 361 } 362 363 if all(retry_bounds_success_bools.values()): 364 chunks_message = ( 365 get_chunks_success_message( 366 batch_bounds_success_tuples, 367 header=batch_message_header, 368 check_rowcounts_only=check_rowcounts_only, 369 ) + f"\nRetried {len(batch_chunk_bounds_to_resync)} chunk" + ( 370 's' 371 if len(batch_chunk_bounds_to_resync) != 1 372 else '' 373 ) + "." 374 ) 375 if deduplicate: 376 deduplicate_success, deduplicate_msg = self.deduplicate( 377 begin=_batch_begin, 378 end=_batch_end, 379 params=params, 380 workers=workers, 381 debug=debug, 382 **kwargs 383 ) 384 return deduplicate_success, chunks_message + '\n\n' + deduplicate_msg 385 return True, chunks_message 386 387 batch_chunks_message = get_chunks_success_message( 388 batch_bounds_success_tuples, 389 header=batch_message_header, 390 check_rowcounts_only=check_rowcounts_only, 391 ) 392 if deduplicate: 393 deduplicate_success, deduplicate_msg = self.deduplicate( 394 begin=begin, 395 end=end, 396 params=params, 397 workers=workers, 398 debug=debug, 399 **kwargs 400 ) 401 return deduplicate_success, batch_chunks_message + '\n\n' + deduplicate_msg 402 return False, batch_chunks_message 403 404 num_batches = len(batches) 405 for batch_i, batch in enumerate(batches): 406 batch_begin = batch[0][0] 407 batch_end = batch[-1][-1] 408 batch_counter_str = f"({(batch_i + 1):,}/{num_batches:,})" 409 batch_label = f"batch {batch_counter_str}:\n{batch_begin} - {batch_end}" 410 retry_failed_batch = True 411 try: 412 for_self = 'for ' + str(self) 413 batch_label_str = batch_label.replace(':\n', ' ' + for_self + '...\n ') 414 info(f"Verifying {batch_label_str}\n") 415 batch_success, batch_msg = process_batch(batch) 416 except (KeyboardInterrupt, Exception) as e: 417 batch_success = False 418 batch_msg = str(e) 419 retry_failed_batch = False 420 421 batch_msg_to_print = ( 422 f"{make_header('Completed batch ' + batch_counter_str + ':')}\n{batch_msg}" 423 ) 424 mrsm.pprint((batch_success, batch_msg_to_print)) 425 426 if not batch_success and retry_failed_batch: 427 info(f"Retrying batch {batch_counter_str}...") 428 retry_batch_success, retry_batch_msg = process_batch(batch) 429 retry_batch_msg_to_print = ( 430 f"Retried {make_header('batch ' + batch_label)}\n{retry_batch_msg}" 431 ) 432 mrsm.pprint((retry_batch_success, retry_batch_msg_to_print)) 433 434 batch_success = retry_batch_success 435 batch_msg = retry_batch_msg 436 437 if not batch_success: 438 return False, f"Failed to verify {batch_label}:\n\n{batch_msg}" 439 440 chunks_message = get_chunks_success_message( 441 bounds_success_tuples, 442 header=message_header, 443 check_rowcounts_only=check_rowcounts_only, 444 ) 445 return True, chunks_message
Verify the contents of the pipe by resyncing its interval.
Parameters
- begin (Union[datetime, int, None], default None): If specified, only verify rows greater than or equal to this value.
- end (Union[datetime, int, None], default None): If specified, only verify rows less than this value.
- chunk_interval (Union[timedelta, int, None], default None):
If provided, use this as the size of the chunk boundaries.
Default to the value set in
pipe.parameters['chunk_minutes']
(1440). - bounded (Optional[bool], default None):
If
True
, do not verify older than the oldest sync time or newer than the newest. IfFalse
, verify unbounded syncs outside of the new and old sync times. The default behavior (None
) is to bound only if a bound interval is set (e.g.pipe.parameters['verify']['bound_days']
). - deduplicate (bool, default False):
If
True
, deduplicate the pipe's table after the verification syncs. - workers (Optional[int], default None):
If provided, limit the verification to this many threads.
Use a value of
1
to sync chunks in series. - batchsize (Optional[int], default None):
If provided, sync this many chunks in parallel.
Defaults to
Pipe.get_num_workers()
. - skip_chunks_with_greater_rowcounts (bool, default False):
If
True
, compare the rowcounts for a chunk and skip syncing if the pipe's chunk rowcount equals or exceeds the remote's rowcount. - check_rowcounts_only (bool, default False):
If
True
, only compare rowcounts and print chunks which are out-of-sync. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All keyword arguments are passed to
pipe.sync()
.
Returns
- A SuccessTuple indicating whether the pipe was successfully resynced.
546def get_bound_interval(self, debug: bool = False) -> Union[timedelta, int, None]: 547 """ 548 Return the interval used to determine the bound time (limit for verification syncs). 549 If the datetime axis is an integer, just return its value. 550 551 Below are the supported keys for the bound interval: 552 553 - `pipe.parameters['verify']['bound_minutes']` 554 - `pipe.parameters['verify']['bound_hours']` 555 - `pipe.parameters['verify']['bound_days']` 556 - `pipe.parameters['verify']['bound_weeks']` 557 - `pipe.parameters['verify']['bound_years']` 558 - `pipe.parameters['verify']['bound_seconds']` 559 560 If multiple keys are present, the first on this priority list will be used. 561 562 Returns 563 ------- 564 A `timedelta` or `int` value to be used to determine the bound time. 565 """ 566 verify_params = self.parameters.get('verify', {}) 567 prefix = 'bound_' 568 suffixes_to_check = ('minutes', 'hours', 'days', 'weeks', 'years', 'seconds') 569 keys_to_search = { 570 key: val 571 for key, val in verify_params.items() 572 if key.startswith(prefix) 573 } 574 bound_time_key, bound_time_value = None, None 575 for key, value in keys_to_search.items(): 576 for suffix in suffixes_to_check: 577 if key == prefix + suffix: 578 bound_time_key = key 579 bound_time_value = value 580 break 581 if bound_time_key is not None: 582 break 583 584 if bound_time_value is None: 585 return bound_time_value 586 587 dt_col = self.columns.get('datetime', None) 588 if not dt_col: 589 return bound_time_value 590 591 dt_typ = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') 592 if 'int' in dt_typ.lower(): 593 return int(bound_time_value) 594 595 interval_type = bound_time_key.replace(prefix, '') 596 return timedelta(**{interval_type: bound_time_value})
Return the interval used to determine the bound time (limit for verification syncs). If the datetime axis is an integer, just return its value.
Below are the supported keys for the bound interval:
- `pipe.parameters['verify']['bound_minutes']`
- `pipe.parameters['verify']['bound_hours']`
- `pipe.parameters['verify']['bound_days']`
- `pipe.parameters['verify']['bound_weeks']`
- `pipe.parameters['verify']['bound_years']`
- `pipe.parameters['verify']['bound_seconds']`
If multiple keys are present, the first on this priority list will be used.
Returns
- A
timedelta
orint
value to be used to determine the bound time.
599def get_bound_time(self, debug: bool = False) -> Union[datetime, int, None]: 600 """ 601 The bound time is the limit at which long-running verification syncs should stop. 602 A value of `None` means verification syncs should be unbounded. 603 604 Like deriving a backtrack time from `pipe.get_sync_time()`, 605 the bound time is the sync time minus a large window (e.g. 366 days). 606 607 Unbound verification syncs (i.e. `bound_time is None`) 608 if the oldest sync time is less than the bound interval. 609 610 Returns 611 ------- 612 A `datetime` or `int` corresponding to the 613 `begin` bound for verification and deduplication syncs. 614 """ 615 bound_interval = self.get_bound_interval(debug=debug) 616 if bound_interval is None: 617 return None 618 619 sync_time = self.get_sync_time(debug=debug) 620 if sync_time is None: 621 return None 622 623 bound_time = sync_time - bound_interval 624 oldest_sync_time = self.get_sync_time(newest=False, debug=debug) 625 max_bound_time_days = STATIC_CONFIG['pipes']['max_bound_time_days'] 626 627 extreme_sync_times_delta = ( 628 hasattr(oldest_sync_time, 'tzinfo') 629 and (sync_time - oldest_sync_time) >= timedelta(days=max_bound_time_days) 630 ) 631 632 return ( 633 bound_time 634 if bound_time > oldest_sync_time or extreme_sync_times_delta 635 else None 636 )
The bound time is the limit at which long-running verification syncs should stop.
A value of None
means verification syncs should be unbounded.
Like deriving a backtrack time from pipe.get_sync_time()
,
the bound time is the sync time minus a large window (e.g. 366 days).
Unbound verification syncs (i.e. bound_time is None
)
if the oldest sync time is less than the bound interval.
Returns
- A
datetime
orint
corresponding to the begin
bound for verification and deduplication syncs.
12def delete( 13 self, 14 drop: bool = True, 15 debug: bool = False, 16 **kw 17 ) -> SuccessTuple: 18 """ 19 Call the Pipe's instance connector's `delete_pipe()` method. 20 21 Parameters 22 ---------- 23 drop: bool, default True 24 If `True`, drop the pipes' target table. 25 26 debug : bool, default False 27 Verbosity toggle. 28 29 Returns 30 ------- 31 A `SuccessTuple` of success (`bool`), message (`str`). 32 33 """ 34 import os, pathlib 35 from meerschaum.utils.warnings import warn 36 from meerschaum.utils.venv import Venv 37 from meerschaum.connectors import get_connector_plugin 38 39 if self.temporary: 40 return ( 41 False, 42 "Cannot delete pipes created with `temporary=True` (read-only). " 43 + "You may want to call `pipe.drop()` instead." 44 ) 45 46 if self.cache_pipe is not None: 47 _drop_cache_tuple = self.cache_pipe.drop(debug=debug, **kw) 48 if not _drop_cache_tuple[0]: 49 warn(_drop_cache_tuple[1]) 50 if getattr(self.cache_connector, 'flavor', None) == 'sqlite': 51 _cache_db_path = pathlib.Path(self.cache_connector.database) 52 try: 53 os.remove(_cache_db_path) 54 except Exception as e: 55 warn(f"Could not delete cache file '{_cache_db_path}' for {self}:\n{e}") 56 57 if drop: 58 drop_success, drop_msg = self.drop(debug=debug) 59 if not drop_success: 60 warn(f"Failed to drop {self}:\n{drop_msg}") 61 62 with Venv(get_connector_plugin(self.instance_connector)): 63 result = self.instance_connector.delete_pipe(self, debug=debug, **kw) 64 65 if not isinstance(result, tuple): 66 return False, f"Received an unexpected result from '{self.instance_connector}': {result}" 67 68 if result[0]: 69 to_delete = ['_id'] 70 for member in to_delete: 71 if member in self.__dict__: 72 del self.__dict__[member] 73 return result
Call the Pipe's instance connector's delete_pipe()
method.
Parameters
- drop (bool, default True):
If
True
, drop the pipes' target table. - debug (bool, default False): Verbosity toggle.
Returns
- A
SuccessTuple
of success (bool
), message (str
).
14def drop( 15 self, 16 debug: bool = False, 17 **kw: Any 18) -> SuccessTuple: 19 """ 20 Call the Pipe's instance connector's `drop_pipe()` method. 21 22 Parameters 23 ---------- 24 debug: bool, default False: 25 Verbosity toggle. 26 27 Returns 28 ------- 29 A `SuccessTuple` of success, message. 30 31 """ 32 self._exists = False 33 from meerschaum.utils.warnings import warn 34 from meerschaum.utils.venv import Venv 35 from meerschaum.connectors import get_connector_plugin 36 37 if self.cache_pipe is not None: 38 _drop_cache_tuple = self.cache_pipe.drop(debug=debug, **kw) 39 if not _drop_cache_tuple[0]: 40 warn(_drop_cache_tuple[1]) 41 42 with Venv(get_connector_plugin(self.instance_connector)): 43 if hasattr(self.instance_connector, 'drop_pipe'): 44 result = self.instance_connector.drop_pipe(self, debug=debug, **kw) 45 else: 46 result = ( 47 False, 48 ( 49 "Cannot drop pipes for instance connectors of type " 50 f"'{self.instance_connector.type}'." 51 ) 52 ) 53 54 55 _ = self.__dict__.pop('_exists', None) 56 _ = self.__dict__.pop('_exists_timestamp', None) 57 58 return result
Call the Pipe's instance connector's drop_pipe()
method.
Parameters
- debug (bool, default False:): Verbosity toggle.
Returns
- A
SuccessTuple
of success, message.
61def drop_indices( 62 self, 63 columns: Optional[List[str]] = None, 64 debug: bool = False, 65 **kw: Any 66) -> SuccessTuple: 67 """ 68 Call the Pipe's instance connector's `drop_indices()` method. 69 70 Parameters 71 ---------- 72 columns: Optional[List[str]] = None 73 If provided, only drop indices in the given list. 74 75 debug: bool, default False: 76 Verbosity toggle. 77 78 Returns 79 ------- 80 A `SuccessTuple` of success, message. 81 82 """ 83 from meerschaum.utils.warnings import warn 84 from meerschaum.utils.venv import Venv 85 from meerschaum.connectors import get_connector_plugin 86 87 _ = self.__dict__.pop('_columns_indices', None) 88 _ = self.__dict__.pop('_columns_indices_timestamp', None) 89 _ = self.__dict__.pop('_columns_types_timestamp', None) 90 _ = self.__dict__.pop('_columns_types', None) 91 92 if self.cache_pipe is not None: 93 _drop_cache_tuple = self.cache_pipe.drop_indices(columns=columns, debug=debug, **kw) 94 if not _drop_cache_tuple[0]: 95 warn(_drop_cache_tuple[1]) 96 97 with Venv(get_connector_plugin(self.instance_connector)): 98 if hasattr(self.instance_connector, 'drop_pipe_indices'): 99 result = self.instance_connector.drop_pipe_indices( 100 self, 101 columns=columns, 102 debug=debug, 103 **kw 104 ) 105 else: 106 result = ( 107 False, 108 ( 109 "Cannot drop indices for instance connectors of type " 110 f"'{self.instance_connector.type}'." 111 ) 112 ) 113 114 _ = self.__dict__.pop('_columns_indices', None) 115 _ = self.__dict__.pop('_columns_indices_timestamp', None) 116 _ = self.__dict__.pop('_columns_types_timestamp', None) 117 _ = self.__dict__.pop('_columns_types', None) 118 119 return result
Call the Pipe's instance connector's drop_indices()
method.
Parameters
- columns (Optional[List[str]] = None): If provided, only drop indices in the given list.
- debug (bool, default False:): Verbosity toggle.
Returns
- A
SuccessTuple
of success, message.
14def create_indices( 15 self, 16 columns: Optional[List[str]] = None, 17 debug: bool = False, 18 **kw: Any 19) -> SuccessTuple: 20 """ 21 Call the Pipe's instance connector's `create_pipe_indices()` method. 22 23 Parameters 24 ---------- 25 debug: bool, default False: 26 Verbosity toggle. 27 28 Returns 29 ------- 30 A `SuccessTuple` of success, message. 31 32 """ 33 from meerschaum.utils.warnings import warn 34 from meerschaum.utils.venv import Venv 35 from meerschaum.connectors import get_connector_plugin 36 37 _ = self.__dict__.pop('_columns_indices', None) 38 _ = self.__dict__.pop('_columns_indices_timestamp', None) 39 _ = self.__dict__.pop('_columns_types_timestamp', None) 40 _ = self.__dict__.pop('_columns_types', None) 41 42 if self.cache_pipe is not None: 43 cache_success, cache_msg = self.cache_pipe.index(columns=columns, debug=debug, **kw) 44 if not cache_success: 45 warn(cache_msg) 46 47 with Venv(get_connector_plugin(self.instance_connector)): 48 if hasattr(self.instance_connector, 'create_pipe_indices'): 49 result = self.instance_connector.create_pipe_indices( 50 self, 51 columns=columns, 52 debug=debug, 53 **kw 54 ) 55 else: 56 result = ( 57 False, 58 ( 59 "Cannot create indices for instance connectors of type " 60 f"'{self.instance_connector.type}'." 61 ) 62 ) 63 64 _ = self.__dict__.pop('_columns_indices', None) 65 _ = self.__dict__.pop('_columns_indices_timestamp', None) 66 _ = self.__dict__.pop('_columns_types_timestamp', None) 67 _ = self.__dict__.pop('_columns_types', None) 68 69 return result
Call the Pipe's instance connector's create_pipe_indices()
method.
Parameters
- debug (bool, default False:): Verbosity toggle.
Returns
- A
SuccessTuple
of success, message.
16def clear( 17 self, 18 begin: Optional[datetime] = None, 19 end: Optional[datetime] = None, 20 params: Optional[Dict[str, Any]] = None, 21 debug: bool = False, 22 **kwargs: Any 23) -> SuccessTuple: 24 """ 25 Call the Pipe's instance connector's `clear_pipe` method. 26 27 Parameters 28 ---------- 29 begin: Optional[datetime], default None: 30 If provided, only remove rows newer than this datetime value. 31 32 end: Optional[datetime], default None: 33 If provided, only remove rows older than this datetime column (not including end). 34 35 params: Optional[Dict[str, Any]], default None 36 See `meerschaum.utils.sql.build_where`. 37 38 debug: bool, default False: 39 Verbositity toggle. 40 41 Returns 42 ------- 43 A `SuccessTuple` corresponding to whether this procedure completed successfully. 44 45 Examples 46 -------- 47 >>> pipe = mrsm.Pipe('test', 'test', columns={'datetime': 'dt'}, instance='sql:local') 48 >>> pipe.sync({'dt': [datetime(2020, 1, 1, 0, 0)]}) 49 >>> pipe.sync({'dt': [datetime(2021, 1, 1, 0, 0)]}) 50 >>> pipe.sync({'dt': [datetime(2022, 1, 1, 0, 0)]}) 51 >>> 52 >>> pipe.clear(begin=datetime(2021, 1, 1, 0, 0)) 53 >>> pipe.get_data() 54 dt 55 0 2020-01-01 56 57 """ 58 from meerschaum.utils.warnings import warn 59 from meerschaum.utils.venv import Venv 60 from meerschaum.connectors import get_connector_plugin 61 62 begin, end = self.parse_date_bounds(begin, end) 63 64 if self.cache_pipe is not None: 65 success, msg = self.cache_pipe.clear( 66 begin=begin, 67 end=end, 68 params=params, 69 debug=debug, 70 **kwargs 71 ) 72 if not success: 73 warn(msg) 74 75 with Venv(get_connector_plugin(self.instance_connector)): 76 return self.instance_connector.clear_pipe( 77 self, 78 begin=begin, 79 end=end, 80 params=params, 81 debug=debug, 82 **kwargs 83 )
Call the Pipe's instance connector's clear_pipe
method.
Parameters
- begin (Optional[datetime], default None:): If provided, only remove rows newer than this datetime value.
- end (Optional[datetime], default None:): If provided, only remove rows older than this datetime column (not including end).
- params (Optional[Dict[str, Any]], default None):
See
meerschaum.utils.sql.build_where
. - debug (bool, default False:): Verbositity toggle.
Returns
- A
SuccessTuple
corresponding to whether this procedure completed successfully.
Examples
>>> pipe = mrsm.Pipe('test', 'test', columns={'datetime': 'dt'}, instance='sql:local')
>>> pipe.sync({'dt': [datetime(2020, 1, 1, 0, 0)]})
>>> pipe.sync({'dt': [datetime(2021, 1, 1, 0, 0)]})
>>> pipe.sync({'dt': [datetime(2022, 1, 1, 0, 0)]})
>>>
>>> pipe.clear(begin=datetime(2021, 1, 1, 0, 0))
>>> pipe.get_data()
dt
0 2020-01-01
15def deduplicate( 16 self, 17 begin: Union[datetime, int, None] = None, 18 end: Union[datetime, int, None] = None, 19 params: Optional[Dict[str, Any]] = None, 20 chunk_interval: Union[datetime, int, None] = None, 21 bounded: Optional[bool] = None, 22 workers: Optional[int] = None, 23 debug: bool = False, 24 _use_instance_method: bool = True, 25 **kwargs: Any 26) -> SuccessTuple: 27 """ 28 Call the Pipe's instance connector's `delete_duplicates` method to delete duplicate rows. 29 30 Parameters 31 ---------- 32 begin: Union[datetime, int, None], default None: 33 If provided, only deduplicate rows newer than this datetime value. 34 35 end: Union[datetime, int, None], default None: 36 If provided, only deduplicate rows older than this datetime column (not including end). 37 38 params: Optional[Dict[str, Any]], default None 39 Restrict deduplication to this filter (for multiplexed data streams). 40 See `meerschaum.utils.sql.build_where`. 41 42 chunk_interval: Union[timedelta, int, None], default None 43 If provided, use this for the chunk bounds. 44 Defaults to the value set in `pipe.parameters['chunk_minutes']` (1440). 45 46 bounded: Optional[bool], default None 47 Only check outside the oldest and newest sync times if bounded is explicitly `False`. 48 49 workers: Optional[int], default None 50 If the instance connector is thread-safe, limit concurrenct syncs to this many threads. 51 52 debug: bool, default False: 53 Verbositity toggle. 54 55 kwargs: Any 56 All other keyword arguments are passed to 57 `pipe.sync()`, `pipe.clear()`, and `pipe.get_data(). 58 59 Returns 60 ------- 61 A `SuccessTuple` corresponding to whether all of the chunks were successfully deduplicated. 62 """ 63 from meerschaum.utils.warnings import warn, info 64 from meerschaum.utils.misc import interval_str, items_str 65 from meerschaum.utils.venv import Venv 66 from meerschaum.connectors import get_connector_plugin 67 from meerschaum.utils.pool import get_pool 68 69 begin, end = self.parse_date_bounds(begin, end) 70 71 if self.cache_pipe is not None: 72 success, msg = self.cache_pipe.deduplicate( 73 begin=begin, 74 end=end, 75 params=params, 76 bounded=bounded, 77 debug=debug, 78 _use_instance_method=_use_instance_method, 79 **kwargs 80 ) 81 if not success: 82 warn(msg) 83 84 workers = self.get_num_workers(workers=workers) 85 pool = get_pool(workers=workers) 86 87 if _use_instance_method: 88 with Venv(get_connector_plugin(self.instance_connector)): 89 if hasattr(self.instance_connector, 'deduplicate_pipe'): 90 return self.instance_connector.deduplicate_pipe( 91 self, 92 begin=begin, 93 end=end, 94 params=params, 95 bounded=bounded, 96 debug=debug, 97 **kwargs 98 ) 99 100 ### Only unbound if explicitly False. 101 if bounded is None: 102 bounded = True 103 chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug) 104 105 bound_time = self.get_bound_time(debug=debug) 106 if bounded and begin is None: 107 begin = ( 108 bound_time 109 if bound_time is not None 110 else self.get_sync_time(newest=False, debug=debug) 111 ) 112 if bounded and end is None: 113 end = self.get_sync_time(newest=True, debug=debug) 114 if end is not None: 115 end += ( 116 timedelta(minutes=1) 117 if hasattr(end, 'tzinfo') 118 else 1 119 ) 120 121 chunk_bounds = self.get_chunk_bounds( 122 bounded=bounded, 123 begin=begin, 124 end=end, 125 chunk_interval=chunk_interval, 126 debug=debug, 127 ) 128 129 indices = [col for col in self.columns.values() if col] 130 if not indices: 131 return False, "Cannot deduplicate without index columns." 132 133 def process_chunk_bounds(bounds) -> Tuple[ 134 Tuple[ 135 Union[datetime, int, None], 136 Union[datetime, int, None] 137 ], 138 SuccessTuple 139 ]: 140 ### Only selecting the index values here to keep bandwidth down. 141 chunk_begin, chunk_end = bounds 142 chunk_df = self.get_data( 143 select_columns=indices, 144 begin=chunk_begin, 145 end=chunk_end, 146 params=params, 147 debug=debug, 148 ) 149 if chunk_df is None: 150 return bounds, (True, "") 151 existing_chunk_len = len(chunk_df) 152 deduped_chunk_df = chunk_df.drop_duplicates(keep='last') 153 deduped_chunk_len = len(deduped_chunk_df) 154 155 if existing_chunk_len == deduped_chunk_len: 156 return bounds, (True, "") 157 158 chunk_msg_header = f"\n{chunk_begin} - {chunk_end}" 159 chunk_msg_body = "" 160 161 full_chunk = self.get_data( 162 begin=chunk_begin, 163 end=chunk_end, 164 params=params, 165 debug=debug, 166 ) 167 if full_chunk is None or len(full_chunk) == 0: 168 return bounds, (True, f"{chunk_msg_header}\nChunk is empty, skipping...") 169 170 chunk_indices = [ix for ix in indices if ix in full_chunk.columns] 171 if not chunk_indices: 172 return bounds, (False, f"None of {items_str(indices)} were present in chunk.") 173 try: 174 full_chunk = full_chunk.drop_duplicates( 175 subset=chunk_indices, 176 keep='last' 177 ).reset_index( 178 drop=True, 179 ) 180 except Exception as e: 181 return ( 182 bounds, 183 (False, f"Failed to deduplicate chunk on {items_str(chunk_indices)}:\n({e})") 184 ) 185 186 clear_success, clear_msg = self.clear( 187 begin=chunk_begin, 188 end=chunk_end, 189 params=params, 190 debug=debug, 191 ) 192 if not clear_success: 193 chunk_msg_body += f"Failed to clear chunk while deduplicating:\n{clear_msg}\n" 194 warn(chunk_msg_body) 195 196 sync_success, sync_msg = self.sync(full_chunk, debug=debug) 197 if not sync_success: 198 chunk_msg_body += f"Failed to sync chunk while deduplicating:\n{sync_msg}\n" 199 200 ### Finally check if the deduplication worked. 201 chunk_rowcount = self.get_rowcount( 202 begin=chunk_begin, 203 end=chunk_end, 204 params=params, 205 debug=debug, 206 ) 207 if chunk_rowcount != deduped_chunk_len: 208 return bounds, ( 209 False, ( 210 chunk_msg_header + "\n" 211 + chunk_msg_body + ("\n" if chunk_msg_body else '') 212 + "Chunk rowcounts still differ (" 213 + f"{chunk_rowcount} rowcount vs {deduped_chunk_len} chunk length)." 214 ) 215 ) 216 217 return bounds, ( 218 True, ( 219 chunk_msg_header + "\n" 220 + chunk_msg_body + ("\n" if chunk_msg_body else '') 221 + f"Deduplicated chunk from {existing_chunk_len} to {chunk_rowcount} rows." 222 ) 223 ) 224 225 info( 226 f"Deduplicating {len(chunk_bounds)} chunk" 227 + ('s' if len(chunk_bounds) != 1 else '') 228 + f" ({'un' if not bounded else ''}bounded)" 229 + f" of size '{interval_str(chunk_interval)}'" 230 + f" on {self}." 231 ) 232 bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds)) 233 bounds_successes = { 234 bounds: success_tuple 235 for bounds, success_tuple in bounds_success_tuples.items() 236 if success_tuple[0] 237 } 238 bounds_failures = { 239 bounds: success_tuple 240 for bounds, success_tuple in bounds_success_tuples.items() 241 if not success_tuple[0] 242 } 243 244 ### No need to retry if everything failed. 245 if len(bounds_failures) > 0 and len(bounds_successes) == 0: 246 return ( 247 False, 248 ( 249 f"Failed to deduplicate {len(bounds_failures)} chunk" 250 + ('s' if len(bounds_failures) != 1 else '') 251 + ".\n" 252 + "\n".join([msg for _, (_, msg) in bounds_failures.items() if msg]) 253 ) 254 ) 255 256 retry_bounds = [bounds for bounds in bounds_failures] 257 if not retry_bounds: 258 return ( 259 True, 260 ( 261 f"Successfully deduplicated {len(bounds_successes)} chunk" 262 + ('s' if len(bounds_successes) != 1 else '') 263 + ".\n" 264 + "\n".join([msg for _, (_, msg) in bounds_successes.items() if msg]) 265 ).rstrip('\n') 266 ) 267 268 info(f"Retrying {len(retry_bounds)} chunks for {self}...") 269 retry_bounds_success_tuples = dict(pool.map(process_chunk_bounds, retry_bounds)) 270 retry_bounds_successes = { 271 bounds: success_tuple 272 for bounds, success_tuple in bounds_success_tuples.items() 273 if success_tuple[0] 274 } 275 retry_bounds_failures = { 276 bounds: success_tuple 277 for bounds, success_tuple in bounds_success_tuples.items() 278 if not success_tuple[0] 279 } 280 281 bounds_successes.update(retry_bounds_successes) 282 if not retry_bounds_failures: 283 return ( 284 True, 285 ( 286 f"Successfully deduplicated {len(bounds_successes)} chunk" 287 + ('s' if len(bounds_successes) != 1 else '') 288 + f"({len(retry_bounds_successes)} retried):\n" 289 + "\n".join([msg for _, (_, msg) in bounds_successes.items() if msg]) 290 ).rstrip('\n') 291 ) 292 293 return ( 294 False, 295 ( 296 f"Failed to deduplicate {len(bounds_failures)} chunk" 297 + ('s' if len(retry_bounds_failures) != 1 else '') 298 + ".\n" 299 + "\n".join([msg for _, (_, msg) in retry_bounds_failures.items() if msg]) 300 ).rstrip('\n') 301 )
Call the Pipe's instance connector's delete_duplicates
method to delete duplicate rows.
Parameters
- begin (Union[datetime, int, None], default None:): If provided, only deduplicate rows newer than this datetime value.
- end (Union[datetime, int, None], default None:): If provided, only deduplicate rows older than this datetime column (not including end).
- params (Optional[Dict[str, Any]], default None):
Restrict deduplication to this filter (for multiplexed data streams).
See
meerschaum.utils.sql.build_where
. - chunk_interval (Union[timedelta, int, None], default None):
If provided, use this for the chunk bounds.
Defaults to the value set in
pipe.parameters['chunk_minutes']
(1440). - bounded (Optional[bool], default None):
Only check outside the oldest and newest sync times if bounded is explicitly
False
. - workers (Optional[int], default None): If the instance connector is thread-safe, limit concurrenct syncs to this many threads.
- debug (bool, default False:): Verbositity toggle.
- kwargs (Any):
All other keyword arguments are passed to
pipe.sync()
,pipe.clear()
, and `pipe.get_data().
Returns
- A
SuccessTuple
corresponding to whether all of the chunks were successfully deduplicated.
14def bootstrap( 15 self, 16 debug: bool = False, 17 yes: bool = False, 18 force: bool = False, 19 noask: bool = False, 20 shell: bool = False, 21 **kw 22) -> SuccessTuple: 23 """ 24 Prompt the user to create a pipe's requirements all from one method. 25 This method shouldn't be used in any automated scripts because it interactively 26 prompts the user and therefore may hang. 27 28 Parameters 29 ---------- 30 debug: bool, default False: 31 Verbosity toggle. 32 33 yes: bool, default False: 34 Print the questions and automatically agree. 35 36 force: bool, default False: 37 Skip the questions and agree anyway. 38 39 noask: bool, default False: 40 Print the questions but go with the default answer. 41 42 shell: bool, default False: 43 Used to determine if we are in the interactive shell. 44 45 Returns 46 ------- 47 A `SuccessTuple` corresponding to the success of this procedure. 48 49 """ 50 51 from meerschaum.utils.warnings import info 52 from meerschaum.utils.prompt import prompt, yes_no 53 from meerschaum.utils.formatting import pprint 54 from meerschaum.config import get_config 55 from meerschaum.utils.formatting._shell import clear_screen 56 from meerschaum.utils.formatting import print_tuple 57 from meerschaum.actions import actions 58 from meerschaum.utils.venv import Venv 59 from meerschaum.connectors import get_connector_plugin 60 61 _clear = get_config('shell', 'clear_screen', patch=True) 62 63 if self.get_id(debug=debug) is not None: 64 delete_tuple = self.delete(debug=debug) 65 if not delete_tuple[0]: 66 return delete_tuple 67 68 if _clear: 69 clear_screen(debug=debug) 70 71 _parameters = _get_parameters(self, debug=debug) 72 self.parameters = _parameters 73 pprint(self.parameters) 74 try: 75 prompt( 76 f"\n Press [Enter] to register {self} with the above configuration:", 77 icon = False 78 ) 79 except KeyboardInterrupt: 80 return False, f"Aborted bootstrapping {self}." 81 82 with Venv(get_connector_plugin(self.instance_connector)): 83 register_tuple = self.instance_connector.register_pipe(self, debug=debug) 84 85 if not register_tuple[0]: 86 return register_tuple 87 88 if _clear: 89 clear_screen(debug=debug) 90 91 try: 92 if yes_no( 93 f"Would you like to edit the definition for {self}?", 94 yes=yes, 95 noask=noask, 96 default='n', 97 ): 98 edit_tuple = self.edit_definition(debug=debug) 99 if not edit_tuple[0]: 100 return edit_tuple 101 102 if yes_no( 103 f"Would you like to try syncing {self} now?", 104 yes=yes, 105 noask=noask, 106 default='n', 107 ): 108 sync_tuple = actions['sync']( 109 ['pipes'], 110 connector_keys=[self.connector_keys], 111 metric_keys=[self.metric_key], 112 location_keys=[self.location_key], 113 mrsm_instance=str(self.instance_connector), 114 debug=debug, 115 shell=shell, 116 ) 117 if not sync_tuple[0]: 118 return sync_tuple 119 except Exception as e: 120 return False, f"Failed to bootstrap {self}:\n" + str(e) 121 122 print_tuple((True, f"Finished bootstrapping {self}!")) 123 info( 124 "You can edit this pipe later with `edit pipes` " 125 + "or set the definition with `edit pipes definition`.\n" 126 + " To sync data into your pipe, run `sync pipes`." 127 ) 128 129 return True, "Success"
Prompt the user to create a pipe's requirements all from one method. This method shouldn't be used in any automated scripts because it interactively prompts the user and therefore may hang.
Parameters
- debug (bool, default False:): Verbosity toggle.
- yes (bool, default False:): Print the questions and automatically agree.
- force (bool, default False:): Skip the questions and agree anyway.
- noask (bool, default False:): Print the questions but go with the default answer.
- shell (bool, default False:): Used to determine if we are in the interactive shell.
Returns
- A
SuccessTuple
corresponding to the success of this procedure.
20def enforce_dtypes( 21 self, 22 df: 'pd.DataFrame', 23 chunksize: Optional[int] = -1, 24 enforce: bool = True, 25 safe_copy: bool = True, 26 debug: bool = False, 27) -> 'pd.DataFrame': 28 """ 29 Cast the input dataframe to the pipe's registered data types. 30 If the pipe does not exist and dtypes are not set, return the dataframe. 31 """ 32 import traceback 33 from meerschaum.utils.warnings import warn 34 from meerschaum.utils.debug import dprint 35 from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes 36 from meerschaum.utils.dtypes import are_dtypes_equal 37 from meerschaum.utils.packages import import_pandas 38 pd = import_pandas(debug=debug) 39 if df is None: 40 if debug: 41 dprint( 42 "Received None instead of a DataFrame.\n" 43 + " Skipping dtype enforcement..." 44 ) 45 return df 46 47 if not self.enforce: 48 enforce = False 49 pipe_dtypes = self.dtypes if enforce else {} 50 51 try: 52 if isinstance(df, str): 53 df = parse_df_datetimes( 54 pd.read_json(StringIO(df)), 55 ignore_cols=[ 56 col 57 for col, dtype in pipe_dtypes.items() 58 if (not enforce or not are_dtypes_equal(dtype, 'datetime')) 59 ], 60 ignore_all=(not enforce), 61 strip_timezone=(self.tzinfo is None), 62 chunksize=chunksize, 63 debug=debug, 64 ) 65 elif isinstance(df, (dict, list)): 66 df = parse_df_datetimes( 67 df, 68 ignore_cols=[ 69 col 70 for col, dtype in pipe_dtypes.items() 71 if (not enforce or not are_dtypes_equal(str(dtype), 'datetime')) 72 ], 73 strip_timezone=(self.tzinfo is None), 74 chunksize=chunksize, 75 debug=debug, 76 ) 77 except Exception as e: 78 warn(f"Unable to cast incoming data as a DataFrame...:\n{e}\n\n{traceback.format_exc()}") 79 return None 80 81 if not pipe_dtypes: 82 if debug: 83 dprint( 84 f"Could not find dtypes for {self}.\n" 85 + " Skipping dtype enforcement..." 86 ) 87 return df 88 89 return _enforce_dtypes( 90 df, 91 pipe_dtypes, 92 safe_copy=safe_copy, 93 strip_timezone=(self.tzinfo is None), 94 coerce_timezone=enforce, 95 debug=debug, 96 )
Cast the input dataframe to the pipe's registered data types. If the pipe does not exist and dtypes are not set, return the dataframe.
99def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str, Any]: 100 """ 101 If `dtypes` is not set in `meerschaum.Pipe.parameters`, 102 infer the data types from the underlying table if it exists. 103 104 Parameters 105 ---------- 106 persist: bool, default False 107 If `True`, persist the inferred data types to `meerschaum.Pipe.parameters`. 108 109 Returns 110 ------- 111 A dictionary of strings containing the pandas data types for this Pipe. 112 """ 113 if not self.exists(debug=debug): 114 return {} 115 116 from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type 117 from meerschaum.utils.dtypes import to_pandas_dtype 118 119 ### NOTE: get_columns_types() may return either the types as 120 ### PostgreSQL- or Pandas-style. 121 columns_types = self.get_columns_types(debug=debug) 122 123 remote_pd_dtypes = { 124 c: ( 125 get_pd_type_from_db_type(t, allow_custom_dtypes=True) 126 if str(t).isupper() 127 else to_pandas_dtype(t) 128 ) 129 for c, t in columns_types.items() 130 } if columns_types else {} 131 if not persist: 132 return remote_pd_dtypes 133 134 dtypes = self.parameters.get('dtypes', {}) 135 dtypes.update({ 136 col: typ 137 for col, typ in remote_pd_dtypes.items() 138 if col not in dtypes 139 }) 140 self.dtypes = dtypes 141 self.edit(interactive=False, debug=debug) 142 return remote_pd_dtypes
If dtypes
is not set in meerschaum.Pipe.parameters
,
infer the data types from the underlying table if it exists.
Parameters
- persist (bool, default False):
If
True
, persist the inferred data types tomeerschaum.Pipe.parameters
.
Returns
- A dictionary of strings containing the pandas data types for this Pipe.
15def copy_to( 16 self, 17 instance_keys: str, 18 sync: bool = True, 19 begin: Union[datetime, int, None] = None, 20 end: Union[datetime, int, None] = None, 21 params: Optional[Dict[str, Any]] = None, 22 chunk_interval: Union[timedelta, int, None] = None, 23 debug: bool = False, 24 **kwargs: Any 25) -> SuccessTuple: 26 """ 27 Copy a pipe to another instance. 28 29 Parameters 30 ---------- 31 instance_keys: str 32 The instance to which to copy this pipe. 33 34 sync: bool, default True 35 If `True`, sync the source pipe's documents 36 37 begin: Union[datetime, int, None], default None 38 Beginning datetime value to pass to `Pipe.get_data()`. 39 40 end: Union[datetime, int, None], default None 41 End datetime value to pass to `Pipe.get_data()`. 42 43 params: Optional[Dict[str, Any]], default None 44 Parameters filter to pass to `Pipe.get_data()`. 45 46 chunk_interval: Union[timedelta, int, None], default None 47 The size of chunks to retrieve from `Pipe.get_data()` for syncing. 48 49 kwargs: Any 50 Additional flags to pass to `Pipe.get_data()` and `Pipe.sync()`, e.g. `workers`. 51 52 Returns 53 ------- 54 A SuccessTuple indicating success. 55 """ 56 if str(instance_keys) == self.instance_keys: 57 return False, f"Cannot copy {self} to instance '{instance_keys}'." 58 59 begin, end = self.parse_date_bounds(begin, end) 60 61 new_pipe = mrsm.Pipe( 62 self.connector_keys, 63 self.metric_key, 64 self.location_key, 65 parameters=self.parameters.copy(), 66 instance=instance_keys, 67 ) 68 69 new_pipe_is_registered = new_pipe.get_id() is not None 70 71 metadata_method = new_pipe.edit if new_pipe_is_registered else new_pipe.register 72 metadata_success, metadata_msg = metadata_method(debug=debug) 73 if not metadata_success: 74 return metadata_success, metadata_msg 75 76 if not self.exists(debug=debug): 77 return True, f"{self} does not exist; nothing to sync." 78 79 original_as_iterator = kwargs.get('as_iterator', None) 80 kwargs['as_iterator'] = True 81 82 chunk_generator = self.get_data( 83 begin=begin, 84 end=end, 85 params=params, 86 chunk_interval=chunk_interval, 87 debug=debug, 88 **kwargs 89 ) 90 91 if original_as_iterator is None: 92 _ = kwargs.pop('as_iterator', None) 93 else: 94 kwargs['as_iterator'] = original_as_iterator 95 96 sync_success, sync_msg = new_pipe.sync( 97 chunk_generator, 98 begin=begin, 99 end=end, 100 params=params, 101 debug=debug, 102 **kwargs 103 ) 104 msg = ( 105 f"Successfully synced {new_pipe}:\n{sync_msg}" 106 if sync_success 107 else f"Failed to sync {new_pipe}:\n{sync_msg}" 108 ) 109 return sync_success, msg
Copy a pipe to another instance.
Parameters
- instance_keys (str): The instance to which to copy this pipe.
- sync (bool, default True):
If
True
, sync the source pipe's documents - begin (Union[datetime, int, None], default None):
Beginning datetime value to pass to
Pipe.get_data()
. - end (Union[datetime, int, None], default None):
End datetime value to pass to
Pipe.get_data()
. - params (Optional[Dict[str, Any]], default None):
Parameters filter to pass to
Pipe.get_data()
. - chunk_interval (Union[timedelta, int, None], default None):
The size of chunks to retrieve from
Pipe.get_data()
for syncing. - kwargs (Any):
Additional flags to pass to
Pipe.get_data()
andPipe.sync()
, e.g.workers
.
Returns
- A SuccessTuple indicating success.
37class Plugin: 38 """Handle packaging of Meerschaum plugins.""" 39 def __init__( 40 self, 41 name: str, 42 version: Optional[str] = None, 43 user_id: Optional[int] = None, 44 required: Optional[List[str]] = None, 45 attributes: Optional[Dict[str, Any]] = None, 46 archive_path: Optional[pathlib.Path] = None, 47 venv_path: Optional[pathlib.Path] = None, 48 repo_connector: Optional['mrsm.connectors.api.APIConnector'] = None, 49 repo: Union['mrsm.connectors.api.APIConnector', str, None] = None, 50 ): 51 from meerschaum.config.static import STATIC_CONFIG 52 sep = STATIC_CONFIG['plugins']['repo_separator'] 53 _repo = None 54 if sep in name: 55 try: 56 name, _repo = name.split(sep) 57 except Exception as e: 58 error(f"Invalid plugin name: '{name}'") 59 self._repo_in_name = _repo 60 61 if attributes is None: 62 attributes = {} 63 self.name = name 64 self.attributes = attributes 65 self.user_id = user_id 66 self._version = version 67 if required: 68 self._required = required 69 self.archive_path = ( 70 archive_path if archive_path is not None 71 else PLUGINS_ARCHIVES_RESOURCES_PATH / f"{self.name}.tar.gz" 72 ) 73 self.venv_path = ( 74 venv_path if venv_path is not None 75 else VIRTENV_RESOURCES_PATH / self.name 76 ) 77 self._repo_connector = repo_connector 78 self._repo_keys = repo 79 80 81 @property 82 def repo_connector(self): 83 """ 84 Return the repository connector for this plugin. 85 NOTE: This imports the `connectors` module, which imports certain plugin modules. 86 """ 87 if self._repo_connector is None: 88 from meerschaum.connectors.parse import parse_repo_keys 89 90 repo_keys = self._repo_keys or self._repo_in_name 91 if self._repo_in_name and self._repo_keys and self._repo_keys != self._repo_in_name: 92 error( 93 f"Received inconsistent repos: '{self._repo_in_name}' and '{self._repo_keys}'." 94 ) 95 repo_connector = parse_repo_keys(repo_keys) 96 self._repo_connector = repo_connector 97 return self._repo_connector 98 99 100 @property 101 def version(self): 102 """ 103 Return the plugin's module version is defined (`__version__`) if it's defined. 104 """ 105 if self._version is None: 106 try: 107 self._version = self.module.__version__ 108 except Exception as e: 109 self._version = None 110 return self._version 111 112 113 @property 114 def module(self): 115 """ 116 Return the Python module of the underlying plugin. 117 """ 118 if '_module' not in self.__dict__ or self.__dict__.get('_module', None) is None: 119 if self.__file__ is None: 120 return None 121 from meerschaum.plugins import import_plugins 122 self._module = import_plugins(str(self), warn=False) 123 return self._module 124 125 126 @property 127 def __file__(self) -> Union[str, None]: 128 """ 129 Return the file path (str) of the plugin if it exists, otherwise `None`. 130 """ 131 if self.__dict__.get('_module', None) is not None: 132 return self.module.__file__ 133 134 potential_dir = PLUGINS_RESOURCES_PATH / self.name 135 if ( 136 potential_dir.exists() 137 and potential_dir.is_dir() 138 and (potential_dir / '__init__.py').exists() 139 ): 140 return str((potential_dir / '__init__.py').as_posix()) 141 142 potential_file = PLUGINS_RESOURCES_PATH / (self.name + '.py') 143 if potential_file.exists() and not potential_file.is_dir(): 144 return str(potential_file.as_posix()) 145 146 return None 147 148 149 @property 150 def requirements_file_path(self) -> Union[pathlib.Path, None]: 151 """ 152 If a file named `requirements.txt` exists, return its path. 153 """ 154 if self.__file__ is None: 155 return None 156 path = pathlib.Path(self.__file__).parent / 'requirements.txt' 157 if not path.exists(): 158 return None 159 return path 160 161 162 def is_installed(self, **kw) -> bool: 163 """ 164 Check whether a plugin is correctly installed. 165 166 Returns 167 ------- 168 A `bool` indicating whether a plugin exists and is successfully imported. 169 """ 170 return self.__file__ is not None 171 172 173 def make_tar(self, debug: bool = False) -> pathlib.Path: 174 """ 175 Compress the plugin's source files into a `.tar.gz` archive and return the archive's path. 176 177 Parameters 178 ---------- 179 debug: bool, default False 180 Verbosity toggle. 181 182 Returns 183 ------- 184 A `pathlib.Path` to the archive file's path. 185 186 """ 187 import tarfile, pathlib, subprocess, fnmatch 188 from meerschaum.utils.debug import dprint 189 from meerschaum.utils.packages import attempt_import 190 pathspec = attempt_import('pathspec', debug=debug) 191 192 if not self.__file__: 193 from meerschaum.utils.warnings import error 194 error(f"Could not find file for plugin '{self}'.") 195 if '__init__.py' in self.__file__ or os.path.isdir(self.__file__): 196 path = self.__file__.replace('__init__.py', '') 197 is_dir = True 198 else: 199 path = self.__file__ 200 is_dir = False 201 202 old_cwd = os.getcwd() 203 real_parent_path = pathlib.Path(os.path.realpath(path)).parent 204 os.chdir(real_parent_path) 205 206 default_patterns_to_ignore = [ 207 '.pyc', 208 '__pycache__/', 209 'eggs/', 210 '__pypackages__/', 211 '.git', 212 ] 213 214 def parse_gitignore() -> 'Set[str]': 215 gitignore_path = pathlib.Path(path) / '.gitignore' 216 if not gitignore_path.exists(): 217 return set(default_patterns_to_ignore) 218 with open(gitignore_path, 'r', encoding='utf-8') as f: 219 gitignore_text = f.read() 220 return set(pathspec.PathSpec.from_lines( 221 pathspec.patterns.GitWildMatchPattern, 222 default_patterns_to_ignore + gitignore_text.splitlines() 223 ).match_tree(path)) 224 225 patterns_to_ignore = parse_gitignore() if is_dir else set() 226 227 if debug: 228 dprint(f"Patterns to ignore:\n{patterns_to_ignore}") 229 230 with tarfile.open(self.archive_path, 'w:gz') as tarf: 231 if not is_dir: 232 tarf.add(f"{self.name}.py") 233 else: 234 for root, dirs, files in os.walk(self.name): 235 for f in files: 236 good_file = True 237 fp = os.path.join(root, f) 238 for pattern in patterns_to_ignore: 239 if pattern in str(fp) or f.startswith('.'): 240 good_file = False 241 break 242 if good_file: 243 if debug: 244 dprint(f"Adding '{fp}'...") 245 tarf.add(fp) 246 247 ### clean up and change back to old directory 248 os.chdir(old_cwd) 249 250 ### change to 775 to avoid permissions issues with the API in a Docker container 251 self.archive_path.chmod(0o775) 252 253 if debug: 254 dprint(f"Created archive '{self.archive_path}'.") 255 return self.archive_path 256 257 258 def install( 259 self, 260 skip_deps: bool = False, 261 force: bool = False, 262 debug: bool = False, 263 ) -> SuccessTuple: 264 """ 265 Extract a plugin's tar archive to the plugins directory. 266 267 This function checks if the plugin is already installed and if the version is equal or 268 greater than the existing installation. 269 270 Parameters 271 ---------- 272 skip_deps: bool, default False 273 If `True`, do not install dependencies. 274 275 force: bool, default False 276 If `True`, continue with installation, even if required packages fail to install. 277 278 debug: bool, default False 279 Verbosity toggle. 280 281 Returns 282 ------- 283 A `SuccessTuple` of success (bool) and a message (str). 284 285 """ 286 if self.full_name in _ongoing_installations: 287 return True, f"Already installing plugin '{self}'." 288 _ongoing_installations.add(self.full_name) 289 from meerschaum.utils.warnings import warn, error 290 if debug: 291 from meerschaum.utils.debug import dprint 292 import tarfile 293 import re 294 import ast 295 from meerschaum.plugins import sync_plugins_symlinks 296 from meerschaum.utils.packages import attempt_import, determine_version, reload_meerschaum 297 from meerschaum.utils.venv import init_venv 298 from meerschaum.utils.misc import safely_extract_tar 299 old_cwd = os.getcwd() 300 old_version = '' 301 new_version = '' 302 temp_dir = PLUGINS_TEMP_RESOURCES_PATH / self.name 303 temp_dir.mkdir(exist_ok=True) 304 305 if not self.archive_path.exists(): 306 return False, f"Missing archive file for plugin '{self}'." 307 if self.version is not None: 308 old_version = self.version 309 if debug: 310 dprint(f"Found existing version '{old_version}' for plugin '{self}'.") 311 312 if debug: 313 dprint(f"Extracting '{self.archive_path}' to '{temp_dir}'...") 314 315 try: 316 with tarfile.open(self.archive_path, 'r:gz') as tarf: 317 safely_extract_tar(tarf, temp_dir) 318 except Exception as e: 319 warn(e) 320 return False, f"Failed to extract plugin '{self.name}'." 321 322 ### search for version information 323 files = os.listdir(temp_dir) 324 325 if str(files[0]) == self.name: 326 is_dir = True 327 elif str(files[0]) == self.name + '.py': 328 is_dir = False 329 else: 330 error(f"Unknown format encountered for plugin '{self}'.") 331 332 fpath = temp_dir / files[0] 333 if is_dir: 334 fpath = fpath / '__init__.py' 335 336 init_venv(self.name, debug=debug) 337 with open(fpath, 'r', encoding='utf-8') as f: 338 init_lines = f.readlines() 339 new_version = None 340 for line in init_lines: 341 if '__version__' not in line: 342 continue 343 version_match = re.search(r'__version__(\s?)=', line.lstrip().rstrip()) 344 if not version_match: 345 continue 346 new_version = ast.literal_eval(line.split('=')[1].lstrip().rstrip()) 347 break 348 if not new_version: 349 warn( 350 f"No `__version__` defined for plugin '{self}'. " 351 + "Assuming new version...", 352 stack = False, 353 ) 354 355 packaging_version = attempt_import('packaging.version') 356 try: 357 is_new_version = (not new_version and not old_version) or ( 358 packaging_version.parse(old_version) < packaging_version.parse(new_version) 359 ) 360 is_same_version = new_version and old_version and ( 361 packaging_version.parse(old_version) == packaging_version.parse(new_version) 362 ) 363 except Exception: 364 is_new_version, is_same_version = True, False 365 366 ### Determine where to permanently store the new plugin. 367 plugin_installation_dir_path = PLUGINS_DIR_PATHS[0] 368 for path in PLUGINS_DIR_PATHS: 369 files_in_plugins_dir = os.listdir(path) 370 if ( 371 self.name in files_in_plugins_dir 372 or 373 (self.name + '.py') in files_in_plugins_dir 374 ): 375 plugin_installation_dir_path = path 376 break 377 378 success_msg = ( 379 f"Successfully installed plugin '{self}'" 380 + ("\n (skipped dependencies)" if skip_deps else "") 381 + "." 382 ) 383 success, abort = None, None 384 385 if is_same_version and not force: 386 success, msg = True, ( 387 f"Plugin '{self}' is up-to-date (version {old_version}).\n" + 388 " Install again with `-f` or `--force` to reinstall." 389 ) 390 abort = True 391 elif is_new_version or force: 392 for src_dir, dirs, files in os.walk(temp_dir): 393 if success is not None: 394 break 395 dst_dir = str(src_dir).replace(str(temp_dir), str(plugin_installation_dir_path)) 396 if not os.path.exists(dst_dir): 397 os.mkdir(dst_dir) 398 for f in files: 399 src_file = os.path.join(src_dir, f) 400 dst_file = os.path.join(dst_dir, f) 401 if os.path.exists(dst_file): 402 os.remove(dst_file) 403 404 if debug: 405 dprint(f"Moving '{src_file}' to '{dst_dir}'...") 406 try: 407 shutil.move(src_file, dst_dir) 408 except Exception: 409 success, msg = False, ( 410 f"Failed to install plugin '{self}': " + 411 f"Could not move file '{src_file}' to '{dst_dir}'" 412 ) 413 print(msg) 414 break 415 if success is None: 416 success, msg = True, success_msg 417 else: 418 success, msg = False, ( 419 f"Your installed version of plugin '{self}' ({old_version}) is higher than " 420 + f"attempted version {new_version}." 421 ) 422 423 shutil.rmtree(temp_dir) 424 os.chdir(old_cwd) 425 426 ### Reload the plugin's module. 427 sync_plugins_symlinks(debug=debug) 428 if '_module' in self.__dict__: 429 del self.__dict__['_module'] 430 init_venv(venv=self.name, force=True, debug=debug) 431 reload_meerschaum(debug=debug) 432 433 ### if we've already failed, return here 434 if not success or abort: 435 _ongoing_installations.remove(self.full_name) 436 return success, msg 437 438 ### attempt to install dependencies 439 dependencies_installed = skip_deps or self.install_dependencies(force=force, debug=debug) 440 if not dependencies_installed: 441 _ongoing_installations.remove(self.full_name) 442 return False, f"Failed to install dependencies for plugin '{self}'." 443 444 ### handling success tuple, bool, or other (typically None) 445 setup_tuple = self.setup(debug=debug) 446 if isinstance(setup_tuple, tuple): 447 if not setup_tuple[0]: 448 success, msg = setup_tuple 449 elif isinstance(setup_tuple, bool): 450 if not setup_tuple: 451 success, msg = False, ( 452 f"Failed to run post-install setup for plugin '{self}'." + '\n' + 453 f"Check `setup()` in '{self.__file__}' for more information " + 454 "(no error message provided)." 455 ) 456 else: 457 success, msg = True, success_msg 458 elif setup_tuple is None: 459 success = True 460 msg = ( 461 f"Post-install for plugin '{self}' returned None. " + 462 "Assuming plugin successfully installed." 463 ) 464 warn(msg) 465 else: 466 success = False 467 msg = ( 468 f"Post-install for plugin '{self}' returned unexpected value " + 469 f"of type '{type(setup_tuple)}': {setup_tuple}" 470 ) 471 472 _ongoing_installations.remove(self.full_name) 473 _ = self.module 474 return success, msg 475 476 477 def remove_archive( 478 self, 479 debug: bool = False 480 ) -> SuccessTuple: 481 """Remove a plugin's archive file.""" 482 if not self.archive_path.exists(): 483 return True, f"Archive file for plugin '{self}' does not exist." 484 try: 485 self.archive_path.unlink() 486 except Exception as e: 487 return False, f"Failed to remove archive for plugin '{self}':\n{e}" 488 return True, "Success" 489 490 491 def remove_venv( 492 self, 493 debug: bool = False 494 ) -> SuccessTuple: 495 """Remove a plugin's virtual environment.""" 496 if not self.venv_path.exists(): 497 return True, f"Virtual environment for plugin '{self}' does not exist." 498 try: 499 shutil.rmtree(self.venv_path) 500 except Exception as e: 501 return False, f"Failed to remove virtual environment for plugin '{self}':\n{e}" 502 return True, "Success" 503 504 505 def uninstall(self, debug: bool = False) -> SuccessTuple: 506 """ 507 Remove a plugin, its virtual environment, and archive file. 508 """ 509 from meerschaum.utils.packages import reload_meerschaum 510 from meerschaum.plugins import sync_plugins_symlinks 511 from meerschaum.utils.warnings import warn, info 512 warnings_thrown_count: int = 0 513 max_warnings: int = 3 514 515 if not self.is_installed(): 516 info( 517 f"Plugin '{self.name}' doesn't seem to be installed.\n " 518 + "Checking for artifacts...", 519 stack = False, 520 ) 521 else: 522 real_path = pathlib.Path(os.path.realpath(self.__file__)) 523 try: 524 if real_path.name == '__init__.py': 525 shutil.rmtree(real_path.parent) 526 else: 527 real_path.unlink() 528 except Exception as e: 529 warn(f"Could not remove source files for plugin '{self.name}':\n{e}", stack=False) 530 warnings_thrown_count += 1 531 else: 532 info(f"Removed source files for plugin '{self.name}'.") 533 534 if self.venv_path.exists(): 535 success, msg = self.remove_venv(debug=debug) 536 if not success: 537 warn(msg, stack=False) 538 warnings_thrown_count += 1 539 else: 540 info(f"Removed virtual environment from plugin '{self.name}'.") 541 542 success = warnings_thrown_count < max_warnings 543 sync_plugins_symlinks(debug=debug) 544 self.deactivate_venv(force=True, debug=debug) 545 reload_meerschaum(debug=debug) 546 return success, ( 547 f"Successfully uninstalled plugin '{self}'." if success 548 else f"Failed to uninstall plugin '{self}'." 549 ) 550 551 552 def setup(self, *args: str, debug: bool = False, **kw: Any) -> Union[SuccessTuple, bool]: 553 """ 554 If exists, run the plugin's `setup()` function. 555 556 Parameters 557 ---------- 558 *args: str 559 The positional arguments passed to the `setup()` function. 560 561 debug: bool, default False 562 Verbosity toggle. 563 564 **kw: Any 565 The keyword arguments passed to the `setup()` function. 566 567 Returns 568 ------- 569 A `SuccessTuple` or `bool` indicating success. 570 571 """ 572 from meerschaum.utils.debug import dprint 573 import inspect 574 _setup = None 575 for name, fp in inspect.getmembers(self.module): 576 if name == 'setup' and inspect.isfunction(fp): 577 _setup = fp 578 break 579 580 ### assume success if no setup() is found (not necessary) 581 if _setup is None: 582 return True 583 584 sig = inspect.signature(_setup) 585 has_debug, has_kw = ('debug' in sig.parameters), False 586 for k, v in sig.parameters.items(): 587 if '**' in str(v): 588 has_kw = True 589 break 590 591 _kw = {} 592 if has_kw: 593 _kw.update(kw) 594 if has_debug: 595 _kw['debug'] = debug 596 597 if debug: 598 dprint(f"Running setup for plugin '{self}'...") 599 try: 600 self.activate_venv(debug=debug) 601 return_tuple = _setup(*args, **_kw) 602 self.deactivate_venv(debug=debug) 603 except Exception as e: 604 return False, str(e) 605 606 if isinstance(return_tuple, tuple): 607 return return_tuple 608 if isinstance(return_tuple, bool): 609 return return_tuple, f"Setup for Plugin '{self.name}' did not return a message." 610 if return_tuple is None: 611 return False, f"Setup for Plugin '{self.name}' returned None." 612 return False, f"Unknown return value from setup for Plugin '{self.name}': {return_tuple}" 613 614 615 def get_dependencies( 616 self, 617 debug: bool = False, 618 ) -> List[str]: 619 """ 620 If the Plugin has specified dependencies in a list called `required`, return the list. 621 622 **NOTE:** Dependecies which start with `'plugin:'` are Meerschaum plugins, not pip packages. 623 Meerschaum plugins may also specify connector keys for a repo after `'@'`. 624 625 Parameters 626 ---------- 627 debug: bool, default False 628 Verbosity toggle. 629 630 Returns 631 ------- 632 A list of required packages and plugins (str). 633 634 """ 635 if '_required' in self.__dict__: 636 return self._required 637 638 ### If the plugin has not yet been imported, 639 ### infer the dependencies from the source text. 640 ### This is not super robust, and it doesn't feel right 641 ### having multiple versions of the logic. 642 ### This is necessary when determining the activation order 643 ### without having import the module. 644 ### For consistency's sake, the module-less method does not cache the requirements. 645 if self.__dict__.get('_module', None) is None: 646 file_path = self.__file__ 647 if file_path is None: 648 return [] 649 with open(file_path, 'r', encoding='utf-8') as f: 650 text = f.read() 651 652 if 'required' not in text: 653 return [] 654 655 ### This has some limitations: 656 ### It relies on `required` being manually declared. 657 ### We lose the ability to dynamically alter the `required` list, 658 ### which is why we've kept the module-reliant method below. 659 import ast, re 660 ### NOTE: This technically would break 661 ### if `required` was the very first line of the file. 662 req_start_match = re.search(r'\nrequired(:\s*)?.*=', text) 663 if not req_start_match: 664 return [] 665 req_start = req_start_match.start() 666 equals_sign = req_start + text[req_start:].find('=') 667 668 ### Dependencies may have brackets within the strings, so push back the index. 669 first_opening_brace = equals_sign + 1 + text[equals_sign:].find('[') 670 if first_opening_brace == -1: 671 return [] 672 673 next_closing_brace = equals_sign + 1 + text[equals_sign:].find(']') 674 if next_closing_brace == -1: 675 return [] 676 677 start_ix = first_opening_brace + 1 678 end_ix = next_closing_brace 679 680 num_braces = 0 681 while True: 682 if '[' not in text[start_ix:end_ix]: 683 break 684 num_braces += 1 685 start_ix = end_ix 686 end_ix += text[end_ix + 1:].find(']') + 1 687 688 req_end = end_ix + 1 689 req_text = ( 690 text[(first_opening_brace-1):req_end] 691 .lstrip() 692 .replace('=', '', 1) 693 .lstrip() 694 .rstrip() 695 ) 696 try: 697 required = ast.literal_eval(req_text) 698 except Exception as e: 699 warn( 700 f"Unable to determine requirements for plugin '{self.name}' " 701 + "without importing the module.\n" 702 + " This may be due to dynamically setting the global `required` list.\n" 703 + f" {e}" 704 ) 705 return [] 706 return required 707 708 import inspect 709 self.activate_venv(dependencies=False, debug=debug) 710 required = [] 711 for name, val in inspect.getmembers(self.module): 712 if name == 'required': 713 required = val 714 break 715 self._required = required 716 self.deactivate_venv(dependencies=False, debug=debug) 717 return required 718 719 720 def get_required_plugins(self, debug: bool=False) -> List[mrsm.plugins.Plugin]: 721 """ 722 Return a list of required Plugin objects. 723 """ 724 from meerschaum.utils.warnings import warn 725 from meerschaum.config import get_config 726 from meerschaum.config.static import STATIC_CONFIG 727 from meerschaum.connectors.parse import is_valid_connector_keys 728 plugins = [] 729 _deps = self.get_dependencies(debug=debug) 730 sep = STATIC_CONFIG['plugins']['repo_separator'] 731 plugin_names = [ 732 _d[len('plugin:'):] for _d in _deps 733 if _d.startswith('plugin:') and len(_d) > len('plugin:') 734 ] 735 default_repo_keys = get_config('meerschaum', 'default_repository') 736 skipped_repo_keys = set() 737 738 for _plugin_name in plugin_names: 739 if sep in _plugin_name: 740 try: 741 _plugin_name, _repo_keys = _plugin_name.split(sep) 742 except Exception: 743 _repo_keys = default_repo_keys 744 warn( 745 f"Invalid repo keys for required plugin '{_plugin_name}'.\n " 746 + f"Will try to use '{_repo_keys}' instead.", 747 stack = False, 748 ) 749 else: 750 _repo_keys = default_repo_keys 751 752 if _repo_keys in skipped_repo_keys: 753 continue 754 755 if not is_valid_connector_keys(_repo_keys): 756 warn( 757 f"Invalid connector '{_repo_keys}'.\n" 758 f" Skipping required plugins from repository '{_repo_keys}'", 759 stack=False, 760 ) 761 continue 762 763 plugins.append(Plugin(_plugin_name, repo=_repo_keys)) 764 765 return plugins 766 767 768 def get_required_packages(self, debug: bool=False) -> List[str]: 769 """ 770 Return the required package names (excluding plugins). 771 """ 772 _deps = self.get_dependencies(debug=debug) 773 return [_d for _d in _deps if not _d.startswith('plugin:')] 774 775 776 def activate_venv(self, dependencies: bool=True, debug: bool=False, **kw) -> bool: 777 """ 778 Activate the virtual environments for the plugin and its dependencies. 779 780 Parameters 781 ---------- 782 dependencies: bool, default True 783 If `True`, activate the virtual environments for required plugins. 784 785 Returns 786 ------- 787 A bool indicating success. 788 """ 789 from meerschaum.utils.venv import venv_target_path 790 from meerschaum.utils.packages import activate_venv 791 from meerschaum.utils.misc import make_symlink, is_symlink 792 from meerschaum.config._paths import PACKAGE_ROOT_PATH 793 794 if dependencies: 795 for plugin in self.get_required_plugins(debug=debug): 796 plugin.activate_venv(debug=debug, **kw) 797 798 vtp = venv_target_path(self.name, debug=debug, allow_nonexistent=True) 799 venv_meerschaum_path = vtp / 'meerschaum' 800 801 try: 802 success, msg = True, "Success" 803 if is_symlink(venv_meerschaum_path): 804 if pathlib.Path(os.path.realpath(venv_meerschaum_path)) != PACKAGE_ROOT_PATH: 805 venv_meerschaum_path.unlink() 806 success, msg = make_symlink(venv_meerschaum_path, PACKAGE_ROOT_PATH) 807 except Exception as e: 808 success, msg = False, str(e) 809 if not success: 810 warn(f"Unable to create symlink {venv_meerschaum_path} to {PACKAGE_ROOT_PATH}:\n{msg}") 811 812 return activate_venv(self.name, debug=debug, **kw) 813 814 815 def deactivate_venv(self, dependencies: bool=True, debug: bool = False, **kw) -> bool: 816 """ 817 Deactivate the virtual environments for the plugin and its dependencies. 818 819 Parameters 820 ---------- 821 dependencies: bool, default True 822 If `True`, deactivate the virtual environments for required plugins. 823 824 Returns 825 ------- 826 A bool indicating success. 827 """ 828 from meerschaum.utils.packages import deactivate_venv 829 success = deactivate_venv(self.name, debug=debug, **kw) 830 if dependencies: 831 for plugin in self.get_required_plugins(debug=debug): 832 plugin.deactivate_venv(debug=debug, **kw) 833 return success 834 835 836 def install_dependencies( 837 self, 838 force: bool = False, 839 debug: bool = False, 840 ) -> bool: 841 """ 842 If specified, install dependencies. 843 844 **NOTE:** Dependencies that start with `'plugin:'` will be installed as 845 Meerschaum plugins from the same repository as this Plugin. 846 To install from a different repository, add the repo keys after `'@'` 847 (e.g. `'plugin:foo@api:bar'`). 848 849 Parameters 850 ---------- 851 force: bool, default False 852 If `True`, continue with the installation, even if some 853 required packages fail to install. 854 855 debug: bool, default False 856 Verbosity toggle. 857 858 Returns 859 ------- 860 A bool indicating success. 861 """ 862 from meerschaum.utils.packages import pip_install, venv_contains_package 863 from meerschaum.utils.warnings import warn, info 864 _deps = self.get_dependencies(debug=debug) 865 if not _deps and self.requirements_file_path is None: 866 return True 867 868 plugins = self.get_required_plugins(debug=debug) 869 for _plugin in plugins: 870 if _plugin.name == self.name: 871 warn(f"Plugin '{self.name}' cannot depend on itself! Skipping...", stack=False) 872 continue 873 _success, _msg = _plugin.repo_connector.install_plugin( 874 _plugin.name, debug=debug, force=force 875 ) 876 if not _success: 877 warn( 878 f"Failed to install required plugin '{_plugin}' from '{_plugin.repo_connector}'" 879 + f" for plugin '{self.name}':\n" + _msg, 880 stack = False, 881 ) 882 if not force: 883 warn( 884 "Try installing with the `--force` flag to continue anyway.", 885 stack = False, 886 ) 887 return False 888 info( 889 "Continuing with installation despite the failure " 890 + "(careful, things might be broken!)...", 891 icon = False 892 ) 893 894 895 ### First step: parse `requirements.txt` if it exists. 896 if self.requirements_file_path is not None: 897 if not pip_install( 898 requirements_file_path=self.requirements_file_path, 899 venv=self.name, debug=debug 900 ): 901 warn( 902 f"Failed to resolve 'requirements.txt' for plugin '{self.name}'.", 903 stack = False, 904 ) 905 if not force: 906 warn( 907 "Try installing with `--force` to continue anyway.", 908 stack = False, 909 ) 910 return False 911 info( 912 "Continuing with installation despite the failure " 913 + "(careful, things might be broken!)...", 914 icon = False 915 ) 916 917 918 ### Don't reinstall packages that are already included in required plugins. 919 packages = [] 920 _packages = self.get_required_packages(debug=debug) 921 accounted_for_packages = set() 922 for package_name in _packages: 923 for plugin in plugins: 924 if venv_contains_package(package_name, plugin.name): 925 accounted_for_packages.add(package_name) 926 break 927 packages = [pkg for pkg in _packages if pkg not in accounted_for_packages] 928 929 ### Attempt pip packages installation. 930 if packages: 931 for package in packages: 932 if not pip_install(package, venv=self.name, debug=debug): 933 warn( 934 f"Failed to install required package '{package}'" 935 + f" for plugin '{self.name}'.", 936 stack = False, 937 ) 938 if not force: 939 warn( 940 "Try installing with `--force` to continue anyway.", 941 stack = False, 942 ) 943 return False 944 info( 945 "Continuing with installation despite the failure " 946 + "(careful, things might be broken!)...", 947 icon = False 948 ) 949 return True 950 951 952 @property 953 def full_name(self) -> str: 954 """ 955 Include the repo keys with the plugin's name. 956 """ 957 from meerschaum.config.static import STATIC_CONFIG 958 sep = STATIC_CONFIG['plugins']['repo_separator'] 959 return self.name + sep + str(self.repo_connector) 960 961 962 def __str__(self): 963 return self.name 964 965 966 def __repr__(self): 967 return f"Plugin('{self.name}', repo='{self.repo_connector}')" 968 969 970 def __del__(self): 971 pass
Handle packaging of Meerschaum plugins.
39 def __init__( 40 self, 41 name: str, 42 version: Optional[str] = None, 43 user_id: Optional[int] = None, 44 required: Optional[List[str]] = None, 45 attributes: Optional[Dict[str, Any]] = None, 46 archive_path: Optional[pathlib.Path] = None, 47 venv_path: Optional[pathlib.Path] = None, 48 repo_connector: Optional['mrsm.connectors.api.APIConnector'] = None, 49 repo: Union['mrsm.connectors.api.APIConnector', str, None] = None, 50 ): 51 from meerschaum.config.static import STATIC_CONFIG 52 sep = STATIC_CONFIG['plugins']['repo_separator'] 53 _repo = None 54 if sep in name: 55 try: 56 name, _repo = name.split(sep) 57 except Exception as e: 58 error(f"Invalid plugin name: '{name}'") 59 self._repo_in_name = _repo 60 61 if attributes is None: 62 attributes = {} 63 self.name = name 64 self.attributes = attributes 65 self.user_id = user_id 66 self._version = version 67 if required: 68 self._required = required 69 self.archive_path = ( 70 archive_path if archive_path is not None 71 else PLUGINS_ARCHIVES_RESOURCES_PATH / f"{self.name}.tar.gz" 72 ) 73 self.venv_path = ( 74 venv_path if venv_path is not None 75 else VIRTENV_RESOURCES_PATH / self.name 76 ) 77 self._repo_connector = repo_connector 78 self._repo_keys = repo
81 @property 82 def repo_connector(self): 83 """ 84 Return the repository connector for this plugin. 85 NOTE: This imports the `connectors` module, which imports certain plugin modules. 86 """ 87 if self._repo_connector is None: 88 from meerschaum.connectors.parse import parse_repo_keys 89 90 repo_keys = self._repo_keys or self._repo_in_name 91 if self._repo_in_name and self._repo_keys and self._repo_keys != self._repo_in_name: 92 error( 93 f"Received inconsistent repos: '{self._repo_in_name}' and '{self._repo_keys}'." 94 ) 95 repo_connector = parse_repo_keys(repo_keys) 96 self._repo_connector = repo_connector 97 return self._repo_connector
Return the repository connector for this plugin.
NOTE: This imports the connectors
module, which imports certain plugin modules.
100 @property 101 def version(self): 102 """ 103 Return the plugin's module version is defined (`__version__`) if it's defined. 104 """ 105 if self._version is None: 106 try: 107 self._version = self.module.__version__ 108 except Exception as e: 109 self._version = None 110 return self._version
Return the plugin's module version is defined (__version__
) if it's defined.
113 @property 114 def module(self): 115 """ 116 Return the Python module of the underlying plugin. 117 """ 118 if '_module' not in self.__dict__ or self.__dict__.get('_module', None) is None: 119 if self.__file__ is None: 120 return None 121 from meerschaum.plugins import import_plugins 122 self._module = import_plugins(str(self), warn=False) 123 return self._module
Return the Python module of the underlying plugin.
149 @property 150 def requirements_file_path(self) -> Union[pathlib.Path, None]: 151 """ 152 If a file named `requirements.txt` exists, return its path. 153 """ 154 if self.__file__ is None: 155 return None 156 path = pathlib.Path(self.__file__).parent / 'requirements.txt' 157 if not path.exists(): 158 return None 159 return path
If a file named requirements.txt
exists, return its path.
162 def is_installed(self, **kw) -> bool: 163 """ 164 Check whether a plugin is correctly installed. 165 166 Returns 167 ------- 168 A `bool` indicating whether a plugin exists and is successfully imported. 169 """ 170 return self.__file__ is not None
Check whether a plugin is correctly installed.
Returns
- A
bool
indicating whether a plugin exists and is successfully imported.
173 def make_tar(self, debug: bool = False) -> pathlib.Path: 174 """ 175 Compress the plugin's source files into a `.tar.gz` archive and return the archive's path. 176 177 Parameters 178 ---------- 179 debug: bool, default False 180 Verbosity toggle. 181 182 Returns 183 ------- 184 A `pathlib.Path` to the archive file's path. 185 186 """ 187 import tarfile, pathlib, subprocess, fnmatch 188 from meerschaum.utils.debug import dprint 189 from meerschaum.utils.packages import attempt_import 190 pathspec = attempt_import('pathspec', debug=debug) 191 192 if not self.__file__: 193 from meerschaum.utils.warnings import error 194 error(f"Could not find file for plugin '{self}'.") 195 if '__init__.py' in self.__file__ or os.path.isdir(self.__file__): 196 path = self.__file__.replace('__init__.py', '') 197 is_dir = True 198 else: 199 path = self.__file__ 200 is_dir = False 201 202 old_cwd = os.getcwd() 203 real_parent_path = pathlib.Path(os.path.realpath(path)).parent 204 os.chdir(real_parent_path) 205 206 default_patterns_to_ignore = [ 207 '.pyc', 208 '__pycache__/', 209 'eggs/', 210 '__pypackages__/', 211 '.git', 212 ] 213 214 def parse_gitignore() -> 'Set[str]': 215 gitignore_path = pathlib.Path(path) / '.gitignore' 216 if not gitignore_path.exists(): 217 return set(default_patterns_to_ignore) 218 with open(gitignore_path, 'r', encoding='utf-8') as f: 219 gitignore_text = f.read() 220 return set(pathspec.PathSpec.from_lines( 221 pathspec.patterns.GitWildMatchPattern, 222 default_patterns_to_ignore + gitignore_text.splitlines() 223 ).match_tree(path)) 224 225 patterns_to_ignore = parse_gitignore() if is_dir else set() 226 227 if debug: 228 dprint(f"Patterns to ignore:\n{patterns_to_ignore}") 229 230 with tarfile.open(self.archive_path, 'w:gz') as tarf: 231 if not is_dir: 232 tarf.add(f"{self.name}.py") 233 else: 234 for root, dirs, files in os.walk(self.name): 235 for f in files: 236 good_file = True 237 fp = os.path.join(root, f) 238 for pattern in patterns_to_ignore: 239 if pattern in str(fp) or f.startswith('.'): 240 good_file = False 241 break 242 if good_file: 243 if debug: 244 dprint(f"Adding '{fp}'...") 245 tarf.add(fp) 246 247 ### clean up and change back to old directory 248 os.chdir(old_cwd) 249 250 ### change to 775 to avoid permissions issues with the API in a Docker container 251 self.archive_path.chmod(0o775) 252 253 if debug: 254 dprint(f"Created archive '{self.archive_path}'.") 255 return self.archive_path
Compress the plugin's source files into a .tar.gz
archive and return the archive's path.
Parameters
- debug (bool, default False): Verbosity toggle.
Returns
- A
pathlib.Path
to the archive file's path.
258 def install( 259 self, 260 skip_deps: bool = False, 261 force: bool = False, 262 debug: bool = False, 263 ) -> SuccessTuple: 264 """ 265 Extract a plugin's tar archive to the plugins directory. 266 267 This function checks if the plugin is already installed and if the version is equal or 268 greater than the existing installation. 269 270 Parameters 271 ---------- 272 skip_deps: bool, default False 273 If `True`, do not install dependencies. 274 275 force: bool, default False 276 If `True`, continue with installation, even if required packages fail to install. 277 278 debug: bool, default False 279 Verbosity toggle. 280 281 Returns 282 ------- 283 A `SuccessTuple` of success (bool) and a message (str). 284 285 """ 286 if self.full_name in _ongoing_installations: 287 return True, f"Already installing plugin '{self}'." 288 _ongoing_installations.add(self.full_name) 289 from meerschaum.utils.warnings import warn, error 290 if debug: 291 from meerschaum.utils.debug import dprint 292 import tarfile 293 import re 294 import ast 295 from meerschaum.plugins import sync_plugins_symlinks 296 from meerschaum.utils.packages import attempt_import, determine_version, reload_meerschaum 297 from meerschaum.utils.venv import init_venv 298 from meerschaum.utils.misc import safely_extract_tar 299 old_cwd = os.getcwd() 300 old_version = '' 301 new_version = '' 302 temp_dir = PLUGINS_TEMP_RESOURCES_PATH / self.name 303 temp_dir.mkdir(exist_ok=True) 304 305 if not self.archive_path.exists(): 306 return False, f"Missing archive file for plugin '{self}'." 307 if self.version is not None: 308 old_version = self.version 309 if debug: 310 dprint(f"Found existing version '{old_version}' for plugin '{self}'.") 311 312 if debug: 313 dprint(f"Extracting '{self.archive_path}' to '{temp_dir}'...") 314 315 try: 316 with tarfile.open(self.archive_path, 'r:gz') as tarf: 317 safely_extract_tar(tarf, temp_dir) 318 except Exception as e: 319 warn(e) 320 return False, f"Failed to extract plugin '{self.name}'." 321 322 ### search for version information 323 files = os.listdir(temp_dir) 324 325 if str(files[0]) == self.name: 326 is_dir = True 327 elif str(files[0]) == self.name + '.py': 328 is_dir = False 329 else: 330 error(f"Unknown format encountered for plugin '{self}'.") 331 332 fpath = temp_dir / files[0] 333 if is_dir: 334 fpath = fpath / '__init__.py' 335 336 init_venv(self.name, debug=debug) 337 with open(fpath, 'r', encoding='utf-8') as f: 338 init_lines = f.readlines() 339 new_version = None 340 for line in init_lines: 341 if '__version__' not in line: 342 continue 343 version_match = re.search(r'__version__(\s?)=', line.lstrip().rstrip()) 344 if not version_match: 345 continue 346 new_version = ast.literal_eval(line.split('=')[1].lstrip().rstrip()) 347 break 348 if not new_version: 349 warn( 350 f"No `__version__` defined for plugin '{self}'. " 351 + "Assuming new version...", 352 stack = False, 353 ) 354 355 packaging_version = attempt_import('packaging.version') 356 try: 357 is_new_version = (not new_version and not old_version) or ( 358 packaging_version.parse(old_version) < packaging_version.parse(new_version) 359 ) 360 is_same_version = new_version and old_version and ( 361 packaging_version.parse(old_version) == packaging_version.parse(new_version) 362 ) 363 except Exception: 364 is_new_version, is_same_version = True, False 365 366 ### Determine where to permanently store the new plugin. 367 plugin_installation_dir_path = PLUGINS_DIR_PATHS[0] 368 for path in PLUGINS_DIR_PATHS: 369 files_in_plugins_dir = os.listdir(path) 370 if ( 371 self.name in files_in_plugins_dir 372 or 373 (self.name + '.py') in files_in_plugins_dir 374 ): 375 plugin_installation_dir_path = path 376 break 377 378 success_msg = ( 379 f"Successfully installed plugin '{self}'" 380 + ("\n (skipped dependencies)" if skip_deps else "") 381 + "." 382 ) 383 success, abort = None, None 384 385 if is_same_version and not force: 386 success, msg = True, ( 387 f"Plugin '{self}' is up-to-date (version {old_version}).\n" + 388 " Install again with `-f` or `--force` to reinstall." 389 ) 390 abort = True 391 elif is_new_version or force: 392 for src_dir, dirs, files in os.walk(temp_dir): 393 if success is not None: 394 break 395 dst_dir = str(src_dir).replace(str(temp_dir), str(plugin_installation_dir_path)) 396 if not os.path.exists(dst_dir): 397 os.mkdir(dst_dir) 398 for f in files: 399 src_file = os.path.join(src_dir, f) 400 dst_file = os.path.join(dst_dir, f) 401 if os.path.exists(dst_file): 402 os.remove(dst_file) 403 404 if debug: 405 dprint(f"Moving '{src_file}' to '{dst_dir}'...") 406 try: 407 shutil.move(src_file, dst_dir) 408 except Exception: 409 success, msg = False, ( 410 f"Failed to install plugin '{self}': " + 411 f"Could not move file '{src_file}' to '{dst_dir}'" 412 ) 413 print(msg) 414 break 415 if success is None: 416 success, msg = True, success_msg 417 else: 418 success, msg = False, ( 419 f"Your installed version of plugin '{self}' ({old_version}) is higher than " 420 + f"attempted version {new_version}." 421 ) 422 423 shutil.rmtree(temp_dir) 424 os.chdir(old_cwd) 425 426 ### Reload the plugin's module. 427 sync_plugins_symlinks(debug=debug) 428 if '_module' in self.__dict__: 429 del self.__dict__['_module'] 430 init_venv(venv=self.name, force=True, debug=debug) 431 reload_meerschaum(debug=debug) 432 433 ### if we've already failed, return here 434 if not success or abort: 435 _ongoing_installations.remove(self.full_name) 436 return success, msg 437 438 ### attempt to install dependencies 439 dependencies_installed = skip_deps or self.install_dependencies(force=force, debug=debug) 440 if not dependencies_installed: 441 _ongoing_installations.remove(self.full_name) 442 return False, f"Failed to install dependencies for plugin '{self}'." 443 444 ### handling success tuple, bool, or other (typically None) 445 setup_tuple = self.setup(debug=debug) 446 if isinstance(setup_tuple, tuple): 447 if not setup_tuple[0]: 448 success, msg = setup_tuple 449 elif isinstance(setup_tuple, bool): 450 if not setup_tuple: 451 success, msg = False, ( 452 f"Failed to run post-install setup for plugin '{self}'." + '\n' + 453 f"Check `setup()` in '{self.__file__}' for more information " + 454 "(no error message provided)." 455 ) 456 else: 457 success, msg = True, success_msg 458 elif setup_tuple is None: 459 success = True 460 msg = ( 461 f"Post-install for plugin '{self}' returned None. " + 462 "Assuming plugin successfully installed." 463 ) 464 warn(msg) 465 else: 466 success = False 467 msg = ( 468 f"Post-install for plugin '{self}' returned unexpected value " + 469 f"of type '{type(setup_tuple)}': {setup_tuple}" 470 ) 471 472 _ongoing_installations.remove(self.full_name) 473 _ = self.module 474 return success, msg
Extract a plugin's tar archive to the plugins directory.
This function checks if the plugin is already installed and if the version is equal or greater than the existing installation.
Parameters
- skip_deps (bool, default False):
If
True
, do not install dependencies. - force (bool, default False):
If
True
, continue with installation, even if required packages fail to install. - debug (bool, default False): Verbosity toggle.
Returns
- A
SuccessTuple
of success (bool) and a message (str).
477 def remove_archive( 478 self, 479 debug: bool = False 480 ) -> SuccessTuple: 481 """Remove a plugin's archive file.""" 482 if not self.archive_path.exists(): 483 return True, f"Archive file for plugin '{self}' does not exist." 484 try: 485 self.archive_path.unlink() 486 except Exception as e: 487 return False, f"Failed to remove archive for plugin '{self}':\n{e}" 488 return True, "Success"
Remove a plugin's archive file.
491 def remove_venv( 492 self, 493 debug: bool = False 494 ) -> SuccessTuple: 495 """Remove a plugin's virtual environment.""" 496 if not self.venv_path.exists(): 497 return True, f"Virtual environment for plugin '{self}' does not exist." 498 try: 499 shutil.rmtree(self.venv_path) 500 except Exception as e: 501 return False, f"Failed to remove virtual environment for plugin '{self}':\n{e}" 502 return True, "Success"
Remove a plugin's virtual environment.
505 def uninstall(self, debug: bool = False) -> SuccessTuple: 506 """ 507 Remove a plugin, its virtual environment, and archive file. 508 """ 509 from meerschaum.utils.packages import reload_meerschaum 510 from meerschaum.plugins import sync_plugins_symlinks 511 from meerschaum.utils.warnings import warn, info 512 warnings_thrown_count: int = 0 513 max_warnings: int = 3 514 515 if not self.is_installed(): 516 info( 517 f"Plugin '{self.name}' doesn't seem to be installed.\n " 518 + "Checking for artifacts...", 519 stack = False, 520 ) 521 else: 522 real_path = pathlib.Path(os.path.realpath(self.__file__)) 523 try: 524 if real_path.name == '__init__.py': 525 shutil.rmtree(real_path.parent) 526 else: 527 real_path.unlink() 528 except Exception as e: 529 warn(f"Could not remove source files for plugin '{self.name}':\n{e}", stack=False) 530 warnings_thrown_count += 1 531 else: 532 info(f"Removed source files for plugin '{self.name}'.") 533 534 if self.venv_path.exists(): 535 success, msg = self.remove_venv(debug=debug) 536 if not success: 537 warn(msg, stack=False) 538 warnings_thrown_count += 1 539 else: 540 info(f"Removed virtual environment from plugin '{self.name}'.") 541 542 success = warnings_thrown_count < max_warnings 543 sync_plugins_symlinks(debug=debug) 544 self.deactivate_venv(force=True, debug=debug) 545 reload_meerschaum(debug=debug) 546 return success, ( 547 f"Successfully uninstalled plugin '{self}'." if success 548 else f"Failed to uninstall plugin '{self}'." 549 )
Remove a plugin, its virtual environment, and archive file.
552 def setup(self, *args: str, debug: bool = False, **kw: Any) -> Union[SuccessTuple, bool]: 553 """ 554 If exists, run the plugin's `setup()` function. 555 556 Parameters 557 ---------- 558 *args: str 559 The positional arguments passed to the `setup()` function. 560 561 debug: bool, default False 562 Verbosity toggle. 563 564 **kw: Any 565 The keyword arguments passed to the `setup()` function. 566 567 Returns 568 ------- 569 A `SuccessTuple` or `bool` indicating success. 570 571 """ 572 from meerschaum.utils.debug import dprint 573 import inspect 574 _setup = None 575 for name, fp in inspect.getmembers(self.module): 576 if name == 'setup' and inspect.isfunction(fp): 577 _setup = fp 578 break 579 580 ### assume success if no setup() is found (not necessary) 581 if _setup is None: 582 return True 583 584 sig = inspect.signature(_setup) 585 has_debug, has_kw = ('debug' in sig.parameters), False 586 for k, v in sig.parameters.items(): 587 if '**' in str(v): 588 has_kw = True 589 break 590 591 _kw = {} 592 if has_kw: 593 _kw.update(kw) 594 if has_debug: 595 _kw['debug'] = debug 596 597 if debug: 598 dprint(f"Running setup for plugin '{self}'...") 599 try: 600 self.activate_venv(debug=debug) 601 return_tuple = _setup(*args, **_kw) 602 self.deactivate_venv(debug=debug) 603 except Exception as e: 604 return False, str(e) 605 606 if isinstance(return_tuple, tuple): 607 return return_tuple 608 if isinstance(return_tuple, bool): 609 return return_tuple, f"Setup for Plugin '{self.name}' did not return a message." 610 if return_tuple is None: 611 return False, f"Setup for Plugin '{self.name}' returned None." 612 return False, f"Unknown return value from setup for Plugin '{self.name}': {return_tuple}"
If exists, run the plugin's setup()
function.
Parameters
- *args (str):
The positional arguments passed to the
setup()
function. - debug (bool, default False): Verbosity toggle.
- **kw (Any):
The keyword arguments passed to the
setup()
function.
Returns
- A
SuccessTuple
orbool
indicating success.
615 def get_dependencies( 616 self, 617 debug: bool = False, 618 ) -> List[str]: 619 """ 620 If the Plugin has specified dependencies in a list called `required`, return the list. 621 622 **NOTE:** Dependecies which start with `'plugin:'` are Meerschaum plugins, not pip packages. 623 Meerschaum plugins may also specify connector keys for a repo after `'@'`. 624 625 Parameters 626 ---------- 627 debug: bool, default False 628 Verbosity toggle. 629 630 Returns 631 ------- 632 A list of required packages and plugins (str). 633 634 """ 635 if '_required' in self.__dict__: 636 return self._required 637 638 ### If the plugin has not yet been imported, 639 ### infer the dependencies from the source text. 640 ### This is not super robust, and it doesn't feel right 641 ### having multiple versions of the logic. 642 ### This is necessary when determining the activation order 643 ### without having import the module. 644 ### For consistency's sake, the module-less method does not cache the requirements. 645 if self.__dict__.get('_module', None) is None: 646 file_path = self.__file__ 647 if file_path is None: 648 return [] 649 with open(file_path, 'r', encoding='utf-8') as f: 650 text = f.read() 651 652 if 'required' not in text: 653 return [] 654 655 ### This has some limitations: 656 ### It relies on `required` being manually declared. 657 ### We lose the ability to dynamically alter the `required` list, 658 ### which is why we've kept the module-reliant method below. 659 import ast, re 660 ### NOTE: This technically would break 661 ### if `required` was the very first line of the file. 662 req_start_match = re.search(r'\nrequired(:\s*)?.*=', text) 663 if not req_start_match: 664 return [] 665 req_start = req_start_match.start() 666 equals_sign = req_start + text[req_start:].find('=') 667 668 ### Dependencies may have brackets within the strings, so push back the index. 669 first_opening_brace = equals_sign + 1 + text[equals_sign:].find('[') 670 if first_opening_brace == -1: 671 return [] 672 673 next_closing_brace = equals_sign + 1 + text[equals_sign:].find(']') 674 if next_closing_brace == -1: 675 return [] 676 677 start_ix = first_opening_brace + 1 678 end_ix = next_closing_brace 679 680 num_braces = 0 681 while True: 682 if '[' not in text[start_ix:end_ix]: 683 break 684 num_braces += 1 685 start_ix = end_ix 686 end_ix += text[end_ix + 1:].find(']') + 1 687 688 req_end = end_ix + 1 689 req_text = ( 690 text[(first_opening_brace-1):req_end] 691 .lstrip() 692 .replace('=', '', 1) 693 .lstrip() 694 .rstrip() 695 ) 696 try: 697 required = ast.literal_eval(req_text) 698 except Exception as e: 699 warn( 700 f"Unable to determine requirements for plugin '{self.name}' " 701 + "without importing the module.\n" 702 + " This may be due to dynamically setting the global `required` list.\n" 703 + f" {e}" 704 ) 705 return [] 706 return required 707 708 import inspect 709 self.activate_venv(dependencies=False, debug=debug) 710 required = [] 711 for name, val in inspect.getmembers(self.module): 712 if name == 'required': 713 required = val 714 break 715 self._required = required 716 self.deactivate_venv(dependencies=False, debug=debug) 717 return required
If the Plugin has specified dependencies in a list called required
, return the list.
NOTE: Dependecies which start with 'plugin:'
are Meerschaum plugins, not pip packages.
Meerschaum plugins may also specify connector keys for a repo after '@'
.
Parameters
- debug (bool, default False): Verbosity toggle.
Returns
- A list of required packages and plugins (str).
720 def get_required_plugins(self, debug: bool=False) -> List[mrsm.plugins.Plugin]: 721 """ 722 Return a list of required Plugin objects. 723 """ 724 from meerschaum.utils.warnings import warn 725 from meerschaum.config import get_config 726 from meerschaum.config.static import STATIC_CONFIG 727 from meerschaum.connectors.parse import is_valid_connector_keys 728 plugins = [] 729 _deps = self.get_dependencies(debug=debug) 730 sep = STATIC_CONFIG['plugins']['repo_separator'] 731 plugin_names = [ 732 _d[len('plugin:'):] for _d in _deps 733 if _d.startswith('plugin:') and len(_d) > len('plugin:') 734 ] 735 default_repo_keys = get_config('meerschaum', 'default_repository') 736 skipped_repo_keys = set() 737 738 for _plugin_name in plugin_names: 739 if sep in _plugin_name: 740 try: 741 _plugin_name, _repo_keys = _plugin_name.split(sep) 742 except Exception: 743 _repo_keys = default_repo_keys 744 warn( 745 f"Invalid repo keys for required plugin '{_plugin_name}'.\n " 746 + f"Will try to use '{_repo_keys}' instead.", 747 stack = False, 748 ) 749 else: 750 _repo_keys = default_repo_keys 751 752 if _repo_keys in skipped_repo_keys: 753 continue 754 755 if not is_valid_connector_keys(_repo_keys): 756 warn( 757 f"Invalid connector '{_repo_keys}'.\n" 758 f" Skipping required plugins from repository '{_repo_keys}'", 759 stack=False, 760 ) 761 continue 762 763 plugins.append(Plugin(_plugin_name, repo=_repo_keys)) 764 765 return plugins
Return a list of required Plugin objects.
768 def get_required_packages(self, debug: bool=False) -> List[str]: 769 """ 770 Return the required package names (excluding plugins). 771 """ 772 _deps = self.get_dependencies(debug=debug) 773 return [_d for _d in _deps if not _d.startswith('plugin:')]
Return the required package names (excluding plugins).
776 def activate_venv(self, dependencies: bool=True, debug: bool=False, **kw) -> bool: 777 """ 778 Activate the virtual environments for the plugin and its dependencies. 779 780 Parameters 781 ---------- 782 dependencies: bool, default True 783 If `True`, activate the virtual environments for required plugins. 784 785 Returns 786 ------- 787 A bool indicating success. 788 """ 789 from meerschaum.utils.venv import venv_target_path 790 from meerschaum.utils.packages import activate_venv 791 from meerschaum.utils.misc import make_symlink, is_symlink 792 from meerschaum.config._paths import PACKAGE_ROOT_PATH 793 794 if dependencies: 795 for plugin in self.get_required_plugins(debug=debug): 796 plugin.activate_venv(debug=debug, **kw) 797 798 vtp = venv_target_path(self.name, debug=debug, allow_nonexistent=True) 799 venv_meerschaum_path = vtp / 'meerschaum' 800 801 try: 802 success, msg = True, "Success" 803 if is_symlink(venv_meerschaum_path): 804 if pathlib.Path(os.path.realpath(venv_meerschaum_path)) != PACKAGE_ROOT_PATH: 805 venv_meerschaum_path.unlink() 806 success, msg = make_symlink(venv_meerschaum_path, PACKAGE_ROOT_PATH) 807 except Exception as e: 808 success, msg = False, str(e) 809 if not success: 810 warn(f"Unable to create symlink {venv_meerschaum_path} to {PACKAGE_ROOT_PATH}:\n{msg}") 811 812 return activate_venv(self.name, debug=debug, **kw)
Activate the virtual environments for the plugin and its dependencies.
Parameters
- dependencies (bool, default True):
If
True
, activate the virtual environments for required plugins.
Returns
- A bool indicating success.
815 def deactivate_venv(self, dependencies: bool=True, debug: bool = False, **kw) -> bool: 816 """ 817 Deactivate the virtual environments for the plugin and its dependencies. 818 819 Parameters 820 ---------- 821 dependencies: bool, default True 822 If `True`, deactivate the virtual environments for required plugins. 823 824 Returns 825 ------- 826 A bool indicating success. 827 """ 828 from meerschaum.utils.packages import deactivate_venv 829 success = deactivate_venv(self.name, debug=debug, **kw) 830 if dependencies: 831 for plugin in self.get_required_plugins(debug=debug): 832 plugin.deactivate_venv(debug=debug, **kw) 833 return success
Deactivate the virtual environments for the plugin and its dependencies.
Parameters
- dependencies (bool, default True):
If
True
, deactivate the virtual environments for required plugins.
Returns
- A bool indicating success.
836 def install_dependencies( 837 self, 838 force: bool = False, 839 debug: bool = False, 840 ) -> bool: 841 """ 842 If specified, install dependencies. 843 844 **NOTE:** Dependencies that start with `'plugin:'` will be installed as 845 Meerschaum plugins from the same repository as this Plugin. 846 To install from a different repository, add the repo keys after `'@'` 847 (e.g. `'plugin:foo@api:bar'`). 848 849 Parameters 850 ---------- 851 force: bool, default False 852 If `True`, continue with the installation, even if some 853 required packages fail to install. 854 855 debug: bool, default False 856 Verbosity toggle. 857 858 Returns 859 ------- 860 A bool indicating success. 861 """ 862 from meerschaum.utils.packages import pip_install, venv_contains_package 863 from meerschaum.utils.warnings import warn, info 864 _deps = self.get_dependencies(debug=debug) 865 if not _deps and self.requirements_file_path is None: 866 return True 867 868 plugins = self.get_required_plugins(debug=debug) 869 for _plugin in plugins: 870 if _plugin.name == self.name: 871 warn(f"Plugin '{self.name}' cannot depend on itself! Skipping...", stack=False) 872 continue 873 _success, _msg = _plugin.repo_connector.install_plugin( 874 _plugin.name, debug=debug, force=force 875 ) 876 if not _success: 877 warn( 878 f"Failed to install required plugin '{_plugin}' from '{_plugin.repo_connector}'" 879 + f" for plugin '{self.name}':\n" + _msg, 880 stack = False, 881 ) 882 if not force: 883 warn( 884 "Try installing with the `--force` flag to continue anyway.", 885 stack = False, 886 ) 887 return False 888 info( 889 "Continuing with installation despite the failure " 890 + "(careful, things might be broken!)...", 891 icon = False 892 ) 893 894 895 ### First step: parse `requirements.txt` if it exists. 896 if self.requirements_file_path is not None: 897 if not pip_install( 898 requirements_file_path=self.requirements_file_path, 899 venv=self.name, debug=debug 900 ): 901 warn( 902 f"Failed to resolve 'requirements.txt' for plugin '{self.name}'.", 903 stack = False, 904 ) 905 if not force: 906 warn( 907 "Try installing with `--force` to continue anyway.", 908 stack = False, 909 ) 910 return False 911 info( 912 "Continuing with installation despite the failure " 913 + "(careful, things might be broken!)...", 914 icon = False 915 ) 916 917 918 ### Don't reinstall packages that are already included in required plugins. 919 packages = [] 920 _packages = self.get_required_packages(debug=debug) 921 accounted_for_packages = set() 922 for package_name in _packages: 923 for plugin in plugins: 924 if venv_contains_package(package_name, plugin.name): 925 accounted_for_packages.add(package_name) 926 break 927 packages = [pkg for pkg in _packages if pkg not in accounted_for_packages] 928 929 ### Attempt pip packages installation. 930 if packages: 931 for package in packages: 932 if not pip_install(package, venv=self.name, debug=debug): 933 warn( 934 f"Failed to install required package '{package}'" 935 + f" for plugin '{self.name}'.", 936 stack = False, 937 ) 938 if not force: 939 warn( 940 "Try installing with `--force` to continue anyway.", 941 stack = False, 942 ) 943 return False 944 info( 945 "Continuing with installation despite the failure " 946 + "(careful, things might be broken!)...", 947 icon = False 948 ) 949 return True
If specified, install dependencies.
NOTE: Dependencies that start with 'plugin:'
will be installed as
Meerschaum plugins from the same repository as this Plugin.
To install from a different repository, add the repo keys after '@'
(e.g. 'plugin:foo@api:bar'
).
Parameters
- force (bool, default False):
If
True
, continue with the installation, even if some required packages fail to install. - debug (bool, default False): Verbosity toggle.
Returns
- A bool indicating success.
952 @property 953 def full_name(self) -> str: 954 """ 955 Include the repo keys with the plugin's name. 956 """ 957 from meerschaum.config.static import STATIC_CONFIG 958 sep = STATIC_CONFIG['plugins']['repo_separator'] 959 return self.name + sep + str(self.repo_connector)
Include the repo keys with the plugin's name.
18class Venv: 19 """ 20 Manage a virtual enviroment's activation status. 21 22 Examples 23 -------- 24 >>> from meerschaum.plugins import Plugin 25 >>> with Venv('mrsm') as venv: 26 ... import pandas 27 >>> with Venv(Plugin('noaa')) as venv: 28 ... import requests 29 >>> venv = Venv('mrsm') 30 >>> venv.activate() 31 True 32 >>> venv.deactivate() 33 True 34 >>> 35 """ 36 37 def __init__( 38 self, 39 venv: Union[str, 'meerschaum.plugins.Plugin', None] = 'mrsm', 40 debug: bool = False, 41 ) -> None: 42 from meerschaum.utils.venv import activate_venv, deactivate_venv, active_venvs 43 ### For some weird threading issue, 44 ### we can't use `isinstance` here. 45 if 'meerschaum.plugins._Plugin' in str(type(venv)): 46 self._venv = venv.name 47 self._activate = venv.activate_venv 48 self._deactivate = venv.deactivate_venv 49 self._kwargs = {} 50 else: 51 self._venv = venv 52 self._activate = activate_venv 53 self._deactivate = deactivate_venv 54 self._kwargs = {'venv': venv} 55 self._debug = debug 56 ### In case someone calls `deactivate()` before `activate()`. 57 self._kwargs['previously_active_venvs'] = copy.deepcopy(active_venvs) 58 59 60 def activate(self, debug: bool = False) -> bool: 61 """ 62 Activate this virtual environment. 63 If a `meerschaum.plugins.Plugin` was provided, its dependent virtual environments 64 will also be activated. 65 """ 66 from meerschaum.utils.venv import active_venvs, init_venv 67 self._kwargs['previously_active_venvs'] = copy.deepcopy(active_venvs) 68 try: 69 return self._activate(debug=(debug or self._debug), **self._kwargs) 70 except OSError as e: 71 if not init_venv(self._venv, force=True): 72 raise e 73 return self._activate(debug=(debug or self._debug), **self._kwargs) 74 75 76 def deactivate(self, debug: bool = False) -> bool: 77 """ 78 Deactivate this virtual environment. 79 If a `meerschaum.plugins.Plugin` was provided, its dependent virtual environments 80 will also be deactivated. 81 """ 82 return self._deactivate(debug=(debug or self._debug), **self._kwargs) 83 84 85 @property 86 def target_path(self) -> pathlib.Path: 87 """ 88 Return the target site-packages path for this virtual environment. 89 A `meerschaum.utils.venv.Venv` may have one virtual environment per minor Python version 90 (e.g. Python 3.10 and Python 3.7). 91 """ 92 from meerschaum.utils.venv import venv_target_path 93 return venv_target_path(venv=self._venv, allow_nonexistent=True, debug=self._debug) 94 95 96 @property 97 def root_path(self) -> pathlib.Path: 98 """ 99 Return the top-level path for this virtual environment. 100 """ 101 from meerschaum.config._paths import VIRTENV_RESOURCES_PATH 102 if self._venv is None: 103 return self.target_path.parent 104 return VIRTENV_RESOURCES_PATH / self._venv 105 106 107 def __enter__(self) -> None: 108 self.activate(debug=self._debug) 109 110 111 def __exit__(self, exc_type, exc_value, exc_traceback) -> None: 112 self.deactivate(debug=self._debug) 113 114 115 def __str__(self) -> str: 116 quote = "'" if self._venv is not None else "" 117 return "Venv(" + quote + str(self._venv) + quote + ")" 118 119 120 def __repr__(self) -> str: 121 return self.__str__()
Manage a virtual enviroment's activation status.
Examples
>>> from meerschaum.plugins import Plugin
>>> with Venv('mrsm') as venv:
... import pandas
>>> with Venv(Plugin('noaa')) as venv:
... import requests
>>> venv = Venv('mrsm')
>>> venv.activate()
True
>>> venv.deactivate()
True
>>>
37 def __init__( 38 self, 39 venv: Union[str, 'meerschaum.plugins.Plugin', None] = 'mrsm', 40 debug: bool = False, 41 ) -> None: 42 from meerschaum.utils.venv import activate_venv, deactivate_venv, active_venvs 43 ### For some weird threading issue, 44 ### we can't use `isinstance` here. 45 if 'meerschaum.plugins._Plugin' in str(type(venv)): 46 self._venv = venv.name 47 self._activate = venv.activate_venv 48 self._deactivate = venv.deactivate_venv 49 self._kwargs = {} 50 else: 51 self._venv = venv 52 self._activate = activate_venv 53 self._deactivate = deactivate_venv 54 self._kwargs = {'venv': venv} 55 self._debug = debug 56 ### In case someone calls `deactivate()` before `activate()`. 57 self._kwargs['previously_active_venvs'] = copy.deepcopy(active_venvs)
60 def activate(self, debug: bool = False) -> bool: 61 """ 62 Activate this virtual environment. 63 If a `meerschaum.plugins.Plugin` was provided, its dependent virtual environments 64 will also be activated. 65 """ 66 from meerschaum.utils.venv import active_venvs, init_venv 67 self._kwargs['previously_active_venvs'] = copy.deepcopy(active_venvs) 68 try: 69 return self._activate(debug=(debug or self._debug), **self._kwargs) 70 except OSError as e: 71 if not init_venv(self._venv, force=True): 72 raise e 73 return self._activate(debug=(debug or self._debug), **self._kwargs)
Activate this virtual environment.
If a meerschaum.plugins.Plugin
was provided, its dependent virtual environments
will also be activated.
76 def deactivate(self, debug: bool = False) -> bool: 77 """ 78 Deactivate this virtual environment. 79 If a `meerschaum.plugins.Plugin` was provided, its dependent virtual environments 80 will also be deactivated. 81 """ 82 return self._deactivate(debug=(debug or self._debug), **self._kwargs)
Deactivate this virtual environment.
If a meerschaum.plugins.Plugin
was provided, its dependent virtual environments
will also be deactivated.
85 @property 86 def target_path(self) -> pathlib.Path: 87 """ 88 Return the target site-packages path for this virtual environment. 89 A `meerschaum.utils.venv.Venv` may have one virtual environment per minor Python version 90 (e.g. Python 3.10 and Python 3.7). 91 """ 92 from meerschaum.utils.venv import venv_target_path 93 return venv_target_path(venv=self._venv, allow_nonexistent=True, debug=self._debug)
Return the target site-packages path for this virtual environment.
A meerschaum.utils.venv.Venv
may have one virtual environment per minor Python version
(e.g. Python 3.10 and Python 3.7).
96 @property 97 def root_path(self) -> pathlib.Path: 98 """ 99 Return the top-level path for this virtual environment. 100 """ 101 from meerschaum.config._paths import VIRTENV_RESOURCES_PATH 102 if self._venv is None: 103 return self.target_path.parent 104 return VIRTENV_RESOURCES_PATH / self._venv
Return the top-level path for this virtual environment.
50class Job: 51 """ 52 Manage a `meerschaum.utils.daemon.Daemon`, locally or remotely via the API. 53 """ 54 55 def __init__( 56 self, 57 name: str, 58 sysargs: Union[List[str], str, None] = None, 59 env: Optional[Dict[str, str]] = None, 60 executor_keys: Optional[str] = None, 61 delete_after_completion: bool = False, 62 _properties: Optional[Dict[str, Any]] = None, 63 _rotating_log=None, 64 _stdin_file=None, 65 _status_hook: Optional[Callable[[], str]] = None, 66 _result_hook: Optional[Callable[[], SuccessTuple]] = None, 67 _externally_managed: bool = False, 68 ): 69 """ 70 Create a new job to manage a `meerschaum.utils.daemon.Daemon`. 71 72 Parameters 73 ---------- 74 name: str 75 The name of the job to be created. 76 This will also be used as the Daemon ID. 77 78 sysargs: Union[List[str], str, None], default None 79 The sysargs of the command to be executed, e.g. 'start api'. 80 81 env: Optional[Dict[str, str]], default None 82 If provided, set these environment variables in the job's process. 83 84 executor_keys: Optional[str], default None 85 If provided, execute the job remotely on an API instance, e.g. 'api:main'. 86 87 delete_after_completion: bool, default False 88 If `True`, delete this job when it has finished executing. 89 90 _properties: Optional[Dict[str, Any]], default None 91 If provided, use this to patch the daemon's properties. 92 """ 93 from meerschaum.utils.daemon import Daemon 94 for char in BANNED_CHARS: 95 if char in name: 96 raise ValueError(f"Invalid name: ({char}) is not allowed.") 97 98 if isinstance(sysargs, str): 99 sysargs = shlex.split(sysargs) 100 101 and_key = STATIC_CONFIG['system']['arguments']['and_key'] 102 escaped_and_key = STATIC_CONFIG['system']['arguments']['escaped_and_key'] 103 if sysargs: 104 sysargs = [ 105 (arg if arg != escaped_and_key else and_key) 106 for arg in sysargs 107 ] 108 109 ### NOTE: 'local' and 'systemd' executors are being coalesced. 110 if executor_keys is None: 111 from meerschaum.jobs import get_executor_keys_from_context 112 executor_keys = get_executor_keys_from_context() 113 114 self.executor_keys = executor_keys 115 self.name = name 116 try: 117 self._daemon = ( 118 Daemon(daemon_id=name) 119 if executor_keys == 'local' 120 else None 121 ) 122 except Exception: 123 self._daemon = None 124 125 ### Handle any injected dependencies. 126 if _rotating_log is not None: 127 self._rotating_log = _rotating_log 128 if self._daemon is not None: 129 self._daemon._rotating_log = _rotating_log 130 131 if _stdin_file is not None: 132 self._stdin_file = _stdin_file 133 if self._daemon is not None: 134 self._daemon._stdin_file = _stdin_file 135 self._daemon._blocking_stdin_file_path = _stdin_file.blocking_file_path 136 137 if _status_hook is not None: 138 self._status_hook = _status_hook 139 140 if _result_hook is not None: 141 self._result_hook = _result_hook 142 143 self._externally_managed = _externally_managed 144 self._properties_patch = _properties or {} 145 if _externally_managed: 146 self._properties_patch.update({'externally_managed': _externally_managed}) 147 148 if env: 149 self._properties_patch.update({'env': env}) 150 151 if delete_after_completion: 152 self._properties_patch.update({'delete_after_completion': delete_after_completion}) 153 154 daemon_sysargs = ( 155 self._daemon.properties.get('target', {}).get('args', [None])[0] 156 if self._daemon is not None 157 else None 158 ) 159 160 if daemon_sysargs and sysargs and daemon_sysargs != sysargs: 161 warn("Given sysargs differ from existing sysargs.") 162 163 self._sysargs = [ 164 arg 165 for arg in (daemon_sysargs or sysargs or []) 166 if arg not in ('-d', '--daemon') 167 ] 168 for restart_flag in RESTART_FLAGS: 169 if restart_flag in self._sysargs: 170 self._properties_patch.update({'restart': True}) 171 break 172 173 @staticmethod 174 def from_pid(pid: int, executor_keys: Optional[str] = None) -> Job: 175 """ 176 Build a `Job` from the PID of a running Meerschaum process. 177 178 Parameters 179 ---------- 180 pid: int 181 The PID of the process. 182 183 executor_keys: Optional[str], default None 184 The executor keys to assign to the job. 185 """ 186 from meerschaum.config.paths import DAEMON_RESOURCES_PATH 187 188 psutil = mrsm.attempt_import('psutil') 189 try: 190 process = psutil.Process(pid) 191 except psutil.NoSuchProcess as e: 192 warn(f"Process with PID {pid} does not exist.", stack=False) 193 raise e 194 195 command_args = process.cmdline() 196 is_daemon = command_args[1] == '-c' 197 198 if is_daemon: 199 daemon_id = command_args[-1].split('daemon_id=')[-1].split(')')[0].replace("'", '') 200 root_dir = process.environ().get(STATIC_CONFIG['environment']['root'], None) 201 if root_dir is None: 202 from meerschaum.config.paths import ROOT_DIR_PATH 203 root_dir = ROOT_DIR_PATH 204 else: 205 root_dir = pathlib.Path(root_dir) 206 jobs_dir = root_dir / DAEMON_RESOURCES_PATH.name 207 daemon_dir = jobs_dir / daemon_id 208 pid_file = daemon_dir / 'process.pid' 209 210 if pid_file.exists(): 211 with open(pid_file, 'r', encoding='utf-8') as f: 212 daemon_pid = int(f.read()) 213 214 if pid != daemon_pid: 215 raise EnvironmentError(f"Differing PIDs: {pid=}, {daemon_pid=}") 216 else: 217 raise EnvironmentError(f"Is job '{daemon_id}' running?") 218 219 return Job(daemon_id, executor_keys=executor_keys) 220 221 from meerschaum._internal.arguments._parse_arguments import parse_arguments 222 from meerschaum.utils.daemon import get_new_daemon_name 223 224 mrsm_ix = 0 225 for i, arg in enumerate(command_args): 226 if 'mrsm' in arg or 'meerschaum' in arg.lower(): 227 mrsm_ix = i 228 break 229 230 sysargs = command_args[mrsm_ix+1:] 231 kwargs = parse_arguments(sysargs) 232 name = kwargs.get('name', get_new_daemon_name()) 233 return Job(name, sysargs, executor_keys=executor_keys) 234 235 def start(self, debug: bool = False) -> SuccessTuple: 236 """ 237 Start the job's daemon. 238 """ 239 if self.executor is not None: 240 if not self.exists(debug=debug): 241 return self.executor.create_job( 242 self.name, 243 self.sysargs, 244 properties=self.daemon.properties, 245 debug=debug, 246 ) 247 return self.executor.start_job(self.name, debug=debug) 248 249 if self.is_running(): 250 return True, f"{self} is already running." 251 252 success, msg = self.daemon.run( 253 keep_daemon_output=(not self.delete_after_completion), 254 allow_dirty_run=True, 255 ) 256 if not success: 257 return success, msg 258 259 return success, f"Started {self}." 260 261 def stop(self, timeout_seconds: Optional[int] = None, debug: bool = False) -> SuccessTuple: 262 """ 263 Stop the job's daemon. 264 """ 265 if self.executor is not None: 266 return self.executor.stop_job(self.name, debug=debug) 267 268 if self.daemon.status == 'stopped': 269 if not self.restart: 270 return True, f"{self} is not running." 271 elif self.stop_time is not None: 272 return True, f"{self} will not restart until manually started." 273 274 quit_success, quit_msg = self.daemon.quit(timeout=timeout_seconds) 275 if quit_success: 276 return quit_success, f"Stopped {self}." 277 278 warn( 279 f"Failed to gracefully quit {self}.", 280 stack=False, 281 ) 282 kill_success, kill_msg = self.daemon.kill(timeout=timeout_seconds) 283 if not kill_success: 284 return kill_success, kill_msg 285 286 return kill_success, f"Killed {self}." 287 288 def pause(self, timeout_seconds: Optional[int] = None, debug: bool = False) -> SuccessTuple: 289 """ 290 Pause the job's daemon. 291 """ 292 if self.executor is not None: 293 return self.executor.pause_job(self.name, debug=debug) 294 295 pause_success, pause_msg = self.daemon.pause(timeout=timeout_seconds) 296 if not pause_success: 297 return pause_success, pause_msg 298 299 return pause_success, f"Paused {self}." 300 301 def delete(self, debug: bool = False) -> SuccessTuple: 302 """ 303 Delete the job and its daemon. 304 """ 305 if self.executor is not None: 306 return self.executor.delete_job(self.name, debug=debug) 307 308 if self.is_running(): 309 stop_success, stop_msg = self.stop() 310 if not stop_success: 311 return stop_success, stop_msg 312 313 cleanup_success, cleanup_msg = self.daemon.cleanup() 314 if not cleanup_success: 315 return cleanup_success, cleanup_msg 316 317 _ = self.daemon._properties.pop('result', None) 318 return cleanup_success, f"Deleted {self}." 319 320 def is_running(self) -> bool: 321 """ 322 Determine whether the job's daemon is running. 323 """ 324 return self.status == 'running' 325 326 def exists(self, debug: bool = False) -> bool: 327 """ 328 Determine whether the job exists. 329 """ 330 if self.executor is not None: 331 return self.executor.get_job_exists(self.name, debug=debug) 332 333 return self.daemon.path.exists() 334 335 def get_logs(self) -> Union[str, None]: 336 """ 337 Return the output text of the job's daemon. 338 """ 339 if self.executor is not None: 340 return self.executor.get_logs(self.name) 341 342 return self.daemon.log_text 343 344 def monitor_logs( 345 self, 346 callback_function: Callable[[str], None] = partial(print, end=''), 347 input_callback_function: Optional[Callable[[], str]] = None, 348 stop_callback_function: Optional[Callable[[SuccessTuple], None]] = None, 349 stop_event: Optional[asyncio.Event] = None, 350 stop_on_exit: bool = False, 351 strip_timestamps: bool = False, 352 accept_input: bool = True, 353 debug: bool = False, 354 ): 355 """ 356 Monitor the job's log files and execute a callback on new lines. 357 358 Parameters 359 ---------- 360 callback_function: Callable[[str], None], default partial(print, end='') 361 The callback to execute as new data comes in. 362 Defaults to printing the output directly to `stdout`. 363 364 input_callback_function: Optional[Callable[[], str]], default None 365 If provided, execute this callback when the daemon is blocking on stdin. 366 Defaults to `sys.stdin.readline()`. 367 368 stop_callback_function: Optional[Callable[[SuccessTuple]], str], default None 369 If provided, execute this callback when the daemon stops. 370 The job's SuccessTuple will be passed to the callback. 371 372 stop_event: Optional[asyncio.Event], default None 373 If provided, stop monitoring when this event is set. 374 You may instead raise `meerschaum.jobs.StopMonitoringLogs` 375 from within `callback_function` to stop monitoring. 376 377 stop_on_exit: bool, default False 378 If `True`, stop monitoring when the job stops. 379 380 strip_timestamps: bool, default False 381 If `True`, remove leading timestamps from lines. 382 383 accept_input: bool, default True 384 If `True`, accept input when the daemon blocks on stdin. 385 """ 386 def default_input_callback_function(): 387 return sys.stdin.readline() 388 389 if input_callback_function is None: 390 input_callback_function = default_input_callback_function 391 392 if self.executor is not None: 393 self.executor.monitor_logs( 394 self.name, 395 callback_function, 396 input_callback_function=input_callback_function, 397 stop_callback_function=stop_callback_function, 398 stop_on_exit=stop_on_exit, 399 accept_input=accept_input, 400 strip_timestamps=strip_timestamps, 401 debug=debug, 402 ) 403 return 404 405 monitor_logs_coroutine = self.monitor_logs_async( 406 callback_function=callback_function, 407 input_callback_function=input_callback_function, 408 stop_callback_function=stop_callback_function, 409 stop_event=stop_event, 410 stop_on_exit=stop_on_exit, 411 strip_timestamps=strip_timestamps, 412 accept_input=accept_input, 413 ) 414 return asyncio.run(monitor_logs_coroutine) 415 416 async def monitor_logs_async( 417 self, 418 callback_function: Callable[[str], None] = partial(print, end='', flush=True), 419 input_callback_function: Optional[Callable[[], str]] = None, 420 stop_callback_function: Optional[Callable[[SuccessTuple], None]] = None, 421 stop_event: Optional[asyncio.Event] = None, 422 stop_on_exit: bool = False, 423 strip_timestamps: bool = False, 424 accept_input: bool = True, 425 _logs_path: Optional[pathlib.Path] = None, 426 _log=None, 427 _stdin_file=None, 428 debug: bool = False, 429 ): 430 """ 431 Monitor the job's log files and await a callback on new lines. 432 433 Parameters 434 ---------- 435 callback_function: Callable[[str], None], default partial(print, end='') 436 The callback to execute as new data comes in. 437 Defaults to printing the output directly to `stdout`. 438 439 input_callback_function: Optional[Callable[[], str]], default None 440 If provided, execute this callback when the daemon is blocking on stdin. 441 Defaults to `sys.stdin.readline()`. 442 443 stop_callback_function: Optional[Callable[[SuccessTuple]], str], default None 444 If provided, execute this callback when the daemon stops. 445 The job's SuccessTuple will be passed to the callback. 446 447 stop_event: Optional[asyncio.Event], default None 448 If provided, stop monitoring when this event is set. 449 You may instead raise `meerschaum.jobs.StopMonitoringLogs` 450 from within `callback_function` to stop monitoring. 451 452 stop_on_exit: bool, default False 453 If `True`, stop monitoring when the job stops. 454 455 strip_timestamps: bool, default False 456 If `True`, remove leading timestamps from lines. 457 458 accept_input: bool, default True 459 If `True`, accept input when the daemon blocks on stdin. 460 """ 461 def default_input_callback_function(): 462 return sys.stdin.readline() 463 464 if input_callback_function is None: 465 input_callback_function = default_input_callback_function 466 467 if self.executor is not None: 468 await self.executor.monitor_logs_async( 469 self.name, 470 callback_function, 471 input_callback_function=input_callback_function, 472 stop_callback_function=stop_callback_function, 473 stop_on_exit=stop_on_exit, 474 strip_timestamps=strip_timestamps, 475 accept_input=accept_input, 476 debug=debug, 477 ) 478 return 479 480 from meerschaum.utils.formatting._jobs import strip_timestamp_from_line 481 482 events = { 483 'user': stop_event, 484 'stopped': asyncio.Event(), 485 } 486 combined_event = asyncio.Event() 487 emitted_text = False 488 stdin_file = _stdin_file if _stdin_file is not None else self.daemon.stdin_file 489 490 async def check_job_status(): 491 nonlocal emitted_text 492 stopped_event = events.get('stopped', None) 493 if stopped_event is None: 494 return 495 496 sleep_time = 0.1 497 while sleep_time < 60: 498 if self.status == 'stopped': 499 if not emitted_text: 500 await asyncio.sleep(sleep_time) 501 sleep_time = round(sleep_time * 1.1, 2) 502 continue 503 504 if stop_callback_function is not None: 505 try: 506 if asyncio.iscoroutinefunction(stop_callback_function): 507 await stop_callback_function(self.result) 508 else: 509 stop_callback_function(self.result) 510 except asyncio.exceptions.CancelledError: 511 break 512 except Exception: 513 warn(traceback.format_exc()) 514 515 if stop_on_exit: 516 events['stopped'].set() 517 518 break 519 await asyncio.sleep(0.1) 520 521 async def check_blocking_on_input(): 522 while True: 523 if not emitted_text or not self.is_blocking_on_stdin(): 524 try: 525 await asyncio.sleep(0.1) 526 except asyncio.exceptions.CancelledError: 527 break 528 continue 529 530 if not self.is_running(): 531 break 532 533 await emit_latest_lines() 534 535 try: 536 print('', end='', flush=True) 537 if asyncio.iscoroutinefunction(input_callback_function): 538 data = await input_callback_function() 539 else: 540 data = input_callback_function() 541 except KeyboardInterrupt: 542 break 543 if not data.endswith('\n'): 544 data += '\n' 545 546 stdin_file.write(data) 547 await asyncio.sleep(0.1) 548 549 async def combine_events(): 550 event_tasks = [ 551 asyncio.create_task(event.wait()) 552 for event in events.values() 553 if event is not None 554 ] 555 if not event_tasks: 556 return 557 558 try: 559 done, pending = await asyncio.wait( 560 event_tasks, 561 return_when=asyncio.FIRST_COMPLETED, 562 ) 563 for task in pending: 564 task.cancel() 565 except asyncio.exceptions.CancelledError: 566 pass 567 finally: 568 combined_event.set() 569 570 check_job_status_task = asyncio.create_task(check_job_status()) 571 check_blocking_on_input_task = asyncio.create_task(check_blocking_on_input()) 572 combine_events_task = asyncio.create_task(combine_events()) 573 574 log = _log if _log is not None else self.daemon.rotating_log 575 lines_to_show = get_config('jobs', 'logs', 'lines_to_show') 576 577 async def emit_latest_lines(): 578 nonlocal emitted_text 579 lines = log.readlines() 580 for line in lines[(-1 * lines_to_show):]: 581 if stop_event is not None and stop_event.is_set(): 582 return 583 584 if strip_timestamps: 585 line = strip_timestamp_from_line(line) 586 587 try: 588 if asyncio.iscoroutinefunction(callback_function): 589 await callback_function(line) 590 else: 591 callback_function(line) 592 emitted_text = True 593 except StopMonitoringLogs: 594 return 595 except Exception: 596 warn(f"Error in logs callback:\n{traceback.format_exc()}") 597 598 await emit_latest_lines() 599 600 tasks = ( 601 [check_job_status_task] 602 + ([check_blocking_on_input_task] if accept_input else []) 603 + [combine_events_task] 604 ) 605 try: 606 _ = asyncio.gather(*tasks, return_exceptions=True) 607 except asyncio.exceptions.CancelledError: 608 raise 609 except Exception: 610 warn(f"Failed to run async checks:\n{traceback.format_exc()}") 611 612 watchfiles = mrsm.attempt_import('watchfiles') 613 async for changes in watchfiles.awatch( 614 _logs_path or LOGS_RESOURCES_PATH, 615 stop_event=combined_event, 616 ): 617 for change in changes: 618 file_path_str = change[1] 619 file_path = pathlib.Path(file_path_str) 620 latest_subfile_path = log.get_latest_subfile_path() 621 if latest_subfile_path != file_path: 622 continue 623 624 await emit_latest_lines() 625 626 await emit_latest_lines() 627 628 def is_blocking_on_stdin(self, debug: bool = False) -> bool: 629 """ 630 Return whether a job's daemon is blocking on stdin. 631 """ 632 if self.executor is not None: 633 return self.executor.get_job_is_blocking_on_stdin(self.name, debug=debug) 634 635 return self.is_running() and self.daemon.blocking_stdin_file_path.exists() 636 637 def write_stdin(self, data): 638 """ 639 Write to a job's daemon's `stdin`. 640 """ 641 self.daemon.stdin_file.write(data) 642 643 @property 644 def executor(self) -> Union[Executor, None]: 645 """ 646 If the job is remote, return the connector to the remote API instance. 647 """ 648 return ( 649 mrsm.get_connector(self.executor_keys) 650 if self.executor_keys != 'local' 651 else None 652 ) 653 654 @property 655 def status(self) -> str: 656 """ 657 Return the running status of the job's daemon. 658 """ 659 if '_status_hook' in self.__dict__: 660 return self._status_hook() 661 662 if self.executor is not None: 663 return self.executor.get_job_status(self.name) 664 665 return self.daemon.status 666 667 @property 668 def pid(self) -> Union[int, None]: 669 """ 670 Return the PID of the job's dameon. 671 """ 672 if self.executor is not None: 673 return self.executor.get_job_metadata(self.name).get('daemon', {}).get('pid', None) 674 675 return self.daemon.pid 676 677 @property 678 def restart(self) -> bool: 679 """ 680 Return whether to restart a stopped job. 681 """ 682 if self.executor is not None: 683 return self.executor.get_job_metadata(self.name).get('restart', False) 684 685 return self.daemon.properties.get('restart', False) 686 687 @property 688 def result(self) -> SuccessTuple: 689 """ 690 Return the `SuccessTuple` when the job has terminated. 691 """ 692 if self.is_running(): 693 return True, f"{self} is running." 694 695 if '_result_hook' in self.__dict__: 696 return self._result_hook() 697 698 if self.executor is not None: 699 return ( 700 self.executor.get_job_metadata(self.name) 701 .get('result', (False, "No result available.")) 702 ) 703 704 _result = self.daemon.properties.get('result', None) 705 if _result is None: 706 return False, "No result available." 707 708 return tuple(_result) 709 710 @property 711 def sysargs(self) -> List[str]: 712 """ 713 Return the sysargs to use for the Daemon. 714 """ 715 if self._sysargs: 716 return self._sysargs 717 718 if self.executor is not None: 719 return self.executor.get_job_metadata(self.name).get('sysargs', []) 720 721 target_args = self.daemon.target_args 722 if target_args is None: 723 return [] 724 self._sysargs = target_args[0] if len(target_args) > 0 else [] 725 return self._sysargs 726 727 @property 728 def daemon(self) -> 'Daemon': 729 """ 730 Return the daemon which this job manages. 731 """ 732 from meerschaum.utils.daemon import Daemon 733 if self._daemon is not None and self.executor is None and self._sysargs: 734 return self._daemon 735 736 remote_properties = ( 737 {} 738 if self.executor is None 739 else self.executor.get_job_properties(self.name) 740 ) 741 properties = {**remote_properties, **self._properties_patch} 742 743 self._daemon = Daemon( 744 target=entry, 745 target_args=[self._sysargs], 746 target_kw={}, 747 daemon_id=self.name, 748 label=shlex.join(self._sysargs), 749 properties=properties, 750 ) 751 if '_rotating_log' in self.__dict__: 752 self._daemon._rotating_log = self._rotating_log 753 754 if '_stdin_file' in self.__dict__: 755 self._daemon._stdin_file = self._stdin_file 756 self._daemon._blocking_stdin_file_path = self._stdin_file.blocking_file_path 757 758 return self._daemon 759 760 @property 761 def began(self) -> Union[datetime, None]: 762 """ 763 The datetime when the job began running. 764 """ 765 if self.executor is not None: 766 began_str = self.executor.get_job_began(self.name) 767 if began_str is None: 768 return None 769 return ( 770 datetime.fromisoformat(began_str) 771 .astimezone(timezone.utc) 772 .replace(tzinfo=None) 773 ) 774 775 began_str = self.daemon.properties.get('process', {}).get('began', None) 776 if began_str is None: 777 return None 778 779 return datetime.fromisoformat(began_str) 780 781 @property 782 def ended(self) -> Union[datetime, None]: 783 """ 784 The datetime when the job stopped running. 785 """ 786 if self.executor is not None: 787 ended_str = self.executor.get_job_ended(self.name) 788 if ended_str is None: 789 return None 790 return ( 791 datetime.fromisoformat(ended_str) 792 .astimezone(timezone.utc) 793 .replace(tzinfo=None) 794 ) 795 796 ended_str = self.daemon.properties.get('process', {}).get('ended', None) 797 if ended_str is None: 798 return None 799 800 return datetime.fromisoformat(ended_str) 801 802 @property 803 def paused(self) -> Union[datetime, None]: 804 """ 805 The datetime when the job was suspended while running. 806 """ 807 if self.executor is not None: 808 paused_str = self.executor.get_job_paused(self.name) 809 if paused_str is None: 810 return None 811 return ( 812 datetime.fromisoformat(paused_str) 813 .astimezone(timezone.utc) 814 .replace(tzinfo=None) 815 ) 816 817 paused_str = self.daemon.properties.get('process', {}).get('paused', None) 818 if paused_str is None: 819 return None 820 821 return datetime.fromisoformat(paused_str) 822 823 @property 824 def stop_time(self) -> Union[datetime, None]: 825 """ 826 Return the timestamp when the job was manually stopped. 827 """ 828 if self.executor is not None: 829 return self.executor.get_job_stop_time(self.name) 830 831 if not self.daemon.stop_path.exists(): 832 return None 833 834 stop_data = self.daemon._read_stop_file() 835 if not stop_data: 836 return None 837 838 stop_time_str = stop_data.get('stop_time', None) 839 if not stop_time_str: 840 warn(f"Could not read stop time for {self}.") 841 return None 842 843 return datetime.fromisoformat(stop_time_str) 844 845 @property 846 def hidden(self) -> bool: 847 """ 848 Return a bool indicating whether this job should be displayed. 849 """ 850 return ( 851 self.name.startswith('_') 852 or self.name.startswith('.') 853 or self._is_externally_managed 854 ) 855 856 def check_restart(self) -> SuccessTuple: 857 """ 858 If `restart` is `True` and the daemon is not running, 859 restart the job. 860 Do not restart if the job was manually stopped. 861 """ 862 if self.is_running(): 863 return True, f"{self} is running." 864 865 if not self.restart: 866 return True, f"{self} does not need to be restarted." 867 868 if self.stop_time is not None: 869 return True, f"{self} was manually stopped." 870 871 return self.start() 872 873 @property 874 def label(self) -> str: 875 """ 876 Return the job's Daemon label (joined sysargs). 877 """ 878 from meerschaum._internal.arguments import compress_pipeline_sysargs 879 sysargs = compress_pipeline_sysargs(self.sysargs) 880 return shlex.join(sysargs).replace(' + ', '\n+ ').replace(' : ', '\n: ').lstrip().rstrip() 881 882 @property 883 def _externally_managed_file(self) -> pathlib.Path: 884 """ 885 Return the path to the externally managed file. 886 """ 887 return self.daemon.path / '.externally-managed' 888 889 def _set_externally_managed(self): 890 """ 891 Set this job as externally managed. 892 """ 893 self._externally_managed = True 894 try: 895 self._externally_managed_file.parent.mkdir(exist_ok=True, parents=True) 896 self._externally_managed_file.touch() 897 except Exception as e: 898 warn(e) 899 900 @property 901 def _is_externally_managed(self) -> bool: 902 """ 903 Return whether this job is externally managed. 904 """ 905 return self.executor_keys in (None, 'local') and ( 906 self._externally_managed or self._externally_managed_file.exists() 907 ) 908 909 @property 910 def env(self) -> Dict[str, str]: 911 """ 912 Return the environment variables to set for the job's process. 913 """ 914 if '_env' in self.__dict__: 915 return self.__dict__['_env'] 916 917 _env = self.daemon.properties.get('env', {}) 918 default_env = { 919 'PYTHONUNBUFFERED': '1', 920 'LINES': str(get_config('jobs', 'terminal', 'lines')), 921 'COLUMNS': str(get_config('jobs', 'terminal', 'columns')), 922 STATIC_CONFIG['environment']['noninteractive']: 'true', 923 } 924 self._env = {**default_env, **_env} 925 return self._env 926 927 @property 928 def delete_after_completion(self) -> bool: 929 """ 930 Return whether this job is configured to delete itself after completion. 931 """ 932 if '_delete_after_completion' in self.__dict__: 933 return self.__dict__.get('_delete_after_completion', False) 934 935 self._delete_after_completion = self.daemon.properties.get('delete_after_completion', False) 936 return self._delete_after_completion 937 938 def __str__(self) -> str: 939 sysargs = self.sysargs 940 sysargs_str = shlex.join(sysargs) if sysargs else '' 941 job_str = f'Job("{self.name}"' 942 if sysargs_str: 943 job_str += f', "{sysargs_str}"' 944 945 job_str += ')' 946 return job_str 947 948 def __repr__(self) -> str: 949 return str(self) 950 951 def __hash__(self) -> int: 952 return hash(self.name)
Manage a meerschaum.utils.daemon.Daemon
, locally or remotely via the API.
55 def __init__( 56 self, 57 name: str, 58 sysargs: Union[List[str], str, None] = None, 59 env: Optional[Dict[str, str]] = None, 60 executor_keys: Optional[str] = None, 61 delete_after_completion: bool = False, 62 _properties: Optional[Dict[str, Any]] = None, 63 _rotating_log=None, 64 _stdin_file=None, 65 _status_hook: Optional[Callable[[], str]] = None, 66 _result_hook: Optional[Callable[[], SuccessTuple]] = None, 67 _externally_managed: bool = False, 68 ): 69 """ 70 Create a new job to manage a `meerschaum.utils.daemon.Daemon`. 71 72 Parameters 73 ---------- 74 name: str 75 The name of the job to be created. 76 This will also be used as the Daemon ID. 77 78 sysargs: Union[List[str], str, None], default None 79 The sysargs of the command to be executed, e.g. 'start api'. 80 81 env: Optional[Dict[str, str]], default None 82 If provided, set these environment variables in the job's process. 83 84 executor_keys: Optional[str], default None 85 If provided, execute the job remotely on an API instance, e.g. 'api:main'. 86 87 delete_after_completion: bool, default False 88 If `True`, delete this job when it has finished executing. 89 90 _properties: Optional[Dict[str, Any]], default None 91 If provided, use this to patch the daemon's properties. 92 """ 93 from meerschaum.utils.daemon import Daemon 94 for char in BANNED_CHARS: 95 if char in name: 96 raise ValueError(f"Invalid name: ({char}) is not allowed.") 97 98 if isinstance(sysargs, str): 99 sysargs = shlex.split(sysargs) 100 101 and_key = STATIC_CONFIG['system']['arguments']['and_key'] 102 escaped_and_key = STATIC_CONFIG['system']['arguments']['escaped_and_key'] 103 if sysargs: 104 sysargs = [ 105 (arg if arg != escaped_and_key else and_key) 106 for arg in sysargs 107 ] 108 109 ### NOTE: 'local' and 'systemd' executors are being coalesced. 110 if executor_keys is None: 111 from meerschaum.jobs import get_executor_keys_from_context 112 executor_keys = get_executor_keys_from_context() 113 114 self.executor_keys = executor_keys 115 self.name = name 116 try: 117 self._daemon = ( 118 Daemon(daemon_id=name) 119 if executor_keys == 'local' 120 else None 121 ) 122 except Exception: 123 self._daemon = None 124 125 ### Handle any injected dependencies. 126 if _rotating_log is not None: 127 self._rotating_log = _rotating_log 128 if self._daemon is not None: 129 self._daemon._rotating_log = _rotating_log 130 131 if _stdin_file is not None: 132 self._stdin_file = _stdin_file 133 if self._daemon is not None: 134 self._daemon._stdin_file = _stdin_file 135 self._daemon._blocking_stdin_file_path = _stdin_file.blocking_file_path 136 137 if _status_hook is not None: 138 self._status_hook = _status_hook 139 140 if _result_hook is not None: 141 self._result_hook = _result_hook 142 143 self._externally_managed = _externally_managed 144 self._properties_patch = _properties or {} 145 if _externally_managed: 146 self._properties_patch.update({'externally_managed': _externally_managed}) 147 148 if env: 149 self._properties_patch.update({'env': env}) 150 151 if delete_after_completion: 152 self._properties_patch.update({'delete_after_completion': delete_after_completion}) 153 154 daemon_sysargs = ( 155 self._daemon.properties.get('target', {}).get('args', [None])[0] 156 if self._daemon is not None 157 else None 158 ) 159 160 if daemon_sysargs and sysargs and daemon_sysargs != sysargs: 161 warn("Given sysargs differ from existing sysargs.") 162 163 self._sysargs = [ 164 arg 165 for arg in (daemon_sysargs or sysargs or []) 166 if arg not in ('-d', '--daemon') 167 ] 168 for restart_flag in RESTART_FLAGS: 169 if restart_flag in self._sysargs: 170 self._properties_patch.update({'restart': True}) 171 break
Create a new job to manage a meerschaum.utils.daemon.Daemon
.
Parameters
- name (str): The name of the job to be created. This will also be used as the Daemon ID.
- sysargs (Union[List[str], str, None], default None): The sysargs of the command to be executed, e.g. 'start api'.
- env (Optional[Dict[str, str]], default None): If provided, set these environment variables in the job's process.
- executor_keys (Optional[str], default None): If provided, execute the job remotely on an API instance, e.g. 'api:main'.
- delete_after_completion (bool, default False):
If
True
, delete this job when it has finished executing. - _properties (Optional[Dict[str, Any]], default None): If provided, use this to patch the daemon's properties.
173 @staticmethod 174 def from_pid(pid: int, executor_keys: Optional[str] = None) -> Job: 175 """ 176 Build a `Job` from the PID of a running Meerschaum process. 177 178 Parameters 179 ---------- 180 pid: int 181 The PID of the process. 182 183 executor_keys: Optional[str], default None 184 The executor keys to assign to the job. 185 """ 186 from meerschaum.config.paths import DAEMON_RESOURCES_PATH 187 188 psutil = mrsm.attempt_import('psutil') 189 try: 190 process = psutil.Process(pid) 191 except psutil.NoSuchProcess as e: 192 warn(f"Process with PID {pid} does not exist.", stack=False) 193 raise e 194 195 command_args = process.cmdline() 196 is_daemon = command_args[1] == '-c' 197 198 if is_daemon: 199 daemon_id = command_args[-1].split('daemon_id=')[-1].split(')')[0].replace("'", '') 200 root_dir = process.environ().get(STATIC_CONFIG['environment']['root'], None) 201 if root_dir is None: 202 from meerschaum.config.paths import ROOT_DIR_PATH 203 root_dir = ROOT_DIR_PATH 204 else: 205 root_dir = pathlib.Path(root_dir) 206 jobs_dir = root_dir / DAEMON_RESOURCES_PATH.name 207 daemon_dir = jobs_dir / daemon_id 208 pid_file = daemon_dir / 'process.pid' 209 210 if pid_file.exists(): 211 with open(pid_file, 'r', encoding='utf-8') as f: 212 daemon_pid = int(f.read()) 213 214 if pid != daemon_pid: 215 raise EnvironmentError(f"Differing PIDs: {pid=}, {daemon_pid=}") 216 else: 217 raise EnvironmentError(f"Is job '{daemon_id}' running?") 218 219 return Job(daemon_id, executor_keys=executor_keys) 220 221 from meerschaum._internal.arguments._parse_arguments import parse_arguments 222 from meerschaum.utils.daemon import get_new_daemon_name 223 224 mrsm_ix = 0 225 for i, arg in enumerate(command_args): 226 if 'mrsm' in arg or 'meerschaum' in arg.lower(): 227 mrsm_ix = i 228 break 229 230 sysargs = command_args[mrsm_ix+1:] 231 kwargs = parse_arguments(sysargs) 232 name = kwargs.get('name', get_new_daemon_name()) 233 return Job(name, sysargs, executor_keys=executor_keys)
Build a Job
from the PID of a running Meerschaum process.
Parameters
- pid (int): The PID of the process.
- executor_keys (Optional[str], default None): The executor keys to assign to the job.
235 def start(self, debug: bool = False) -> SuccessTuple: 236 """ 237 Start the job's daemon. 238 """ 239 if self.executor is not None: 240 if not self.exists(debug=debug): 241 return self.executor.create_job( 242 self.name, 243 self.sysargs, 244 properties=self.daemon.properties, 245 debug=debug, 246 ) 247 return self.executor.start_job(self.name, debug=debug) 248 249 if self.is_running(): 250 return True, f"{self} is already running." 251 252 success, msg = self.daemon.run( 253 keep_daemon_output=(not self.delete_after_completion), 254 allow_dirty_run=True, 255 ) 256 if not success: 257 return success, msg 258 259 return success, f"Started {self}."
Start the job's daemon.
261 def stop(self, timeout_seconds: Optional[int] = None, debug: bool = False) -> SuccessTuple: 262 """ 263 Stop the job's daemon. 264 """ 265 if self.executor is not None: 266 return self.executor.stop_job(self.name, debug=debug) 267 268 if self.daemon.status == 'stopped': 269 if not self.restart: 270 return True, f"{self} is not running." 271 elif self.stop_time is not None: 272 return True, f"{self} will not restart until manually started." 273 274 quit_success, quit_msg = self.daemon.quit(timeout=timeout_seconds) 275 if quit_success: 276 return quit_success, f"Stopped {self}." 277 278 warn( 279 f"Failed to gracefully quit {self}.", 280 stack=False, 281 ) 282 kill_success, kill_msg = self.daemon.kill(timeout=timeout_seconds) 283 if not kill_success: 284 return kill_success, kill_msg 285 286 return kill_success, f"Killed {self}."
Stop the job's daemon.
288 def pause(self, timeout_seconds: Optional[int] = None, debug: bool = False) -> SuccessTuple: 289 """ 290 Pause the job's daemon. 291 """ 292 if self.executor is not None: 293 return self.executor.pause_job(self.name, debug=debug) 294 295 pause_success, pause_msg = self.daemon.pause(timeout=timeout_seconds) 296 if not pause_success: 297 return pause_success, pause_msg 298 299 return pause_success, f"Paused {self}."
Pause the job's daemon.
301 def delete(self, debug: bool = False) -> SuccessTuple: 302 """ 303 Delete the job and its daemon. 304 """ 305 if self.executor is not None: 306 return self.executor.delete_job(self.name, debug=debug) 307 308 if self.is_running(): 309 stop_success, stop_msg = self.stop() 310 if not stop_success: 311 return stop_success, stop_msg 312 313 cleanup_success, cleanup_msg = self.daemon.cleanup() 314 if not cleanup_success: 315 return cleanup_success, cleanup_msg 316 317 _ = self.daemon._properties.pop('result', None) 318 return cleanup_success, f"Deleted {self}."
Delete the job and its daemon.
320 def is_running(self) -> bool: 321 """ 322 Determine whether the job's daemon is running. 323 """ 324 return self.status == 'running'
Determine whether the job's daemon is running.
326 def exists(self, debug: bool = False) -> bool: 327 """ 328 Determine whether the job exists. 329 """ 330 if self.executor is not None: 331 return self.executor.get_job_exists(self.name, debug=debug) 332 333 return self.daemon.path.exists()
Determine whether the job exists.
335 def get_logs(self) -> Union[str, None]: 336 """ 337 Return the output text of the job's daemon. 338 """ 339 if self.executor is not None: 340 return self.executor.get_logs(self.name) 341 342 return self.daemon.log_text
Return the output text of the job's daemon.
344 def monitor_logs( 345 self, 346 callback_function: Callable[[str], None] = partial(print, end=''), 347 input_callback_function: Optional[Callable[[], str]] = None, 348 stop_callback_function: Optional[Callable[[SuccessTuple], None]] = None, 349 stop_event: Optional[asyncio.Event] = None, 350 stop_on_exit: bool = False, 351 strip_timestamps: bool = False, 352 accept_input: bool = True, 353 debug: bool = False, 354 ): 355 """ 356 Monitor the job's log files and execute a callback on new lines. 357 358 Parameters 359 ---------- 360 callback_function: Callable[[str], None], default partial(print, end='') 361 The callback to execute as new data comes in. 362 Defaults to printing the output directly to `stdout`. 363 364 input_callback_function: Optional[Callable[[], str]], default None 365 If provided, execute this callback when the daemon is blocking on stdin. 366 Defaults to `sys.stdin.readline()`. 367 368 stop_callback_function: Optional[Callable[[SuccessTuple]], str], default None 369 If provided, execute this callback when the daemon stops. 370 The job's SuccessTuple will be passed to the callback. 371 372 stop_event: Optional[asyncio.Event], default None 373 If provided, stop monitoring when this event is set. 374 You may instead raise `meerschaum.jobs.StopMonitoringLogs` 375 from within `callback_function` to stop monitoring. 376 377 stop_on_exit: bool, default False 378 If `True`, stop monitoring when the job stops. 379 380 strip_timestamps: bool, default False 381 If `True`, remove leading timestamps from lines. 382 383 accept_input: bool, default True 384 If `True`, accept input when the daemon blocks on stdin. 385 """ 386 def default_input_callback_function(): 387 return sys.stdin.readline() 388 389 if input_callback_function is None: 390 input_callback_function = default_input_callback_function 391 392 if self.executor is not None: 393 self.executor.monitor_logs( 394 self.name, 395 callback_function, 396 input_callback_function=input_callback_function, 397 stop_callback_function=stop_callback_function, 398 stop_on_exit=stop_on_exit, 399 accept_input=accept_input, 400 strip_timestamps=strip_timestamps, 401 debug=debug, 402 ) 403 return 404 405 monitor_logs_coroutine = self.monitor_logs_async( 406 callback_function=callback_function, 407 input_callback_function=input_callback_function, 408 stop_callback_function=stop_callback_function, 409 stop_event=stop_event, 410 stop_on_exit=stop_on_exit, 411 strip_timestamps=strip_timestamps, 412 accept_input=accept_input, 413 ) 414 return asyncio.run(monitor_logs_coroutine)
Monitor the job's log files and execute a callback on new lines.
Parameters
- callback_function (Callable[[str], None], default partial(print, end='')):
The callback to execute as new data comes in.
Defaults to printing the output directly to
stdout
. - input_callback_function (Optional[Callable[[], str]], default None):
If provided, execute this callback when the daemon is blocking on stdin.
Defaults to
sys.stdin.readline()
. - stop_callback_function (Optional[Callable[[SuccessTuple]], str], default None): If provided, execute this callback when the daemon stops. The job's SuccessTuple will be passed to the callback.
- stop_event (Optional[asyncio.Event], default None):
If provided, stop monitoring when this event is set.
You may instead raise
meerschaum.jobs.StopMonitoringLogs
from withincallback_function
to stop monitoring. - stop_on_exit (bool, default False):
If
True
, stop monitoring when the job stops. - strip_timestamps (bool, default False):
If
True
, remove leading timestamps from lines. - accept_input (bool, default True):
If
True
, accept input when the daemon blocks on stdin.
416 async def monitor_logs_async( 417 self, 418 callback_function: Callable[[str], None] = partial(print, end='', flush=True), 419 input_callback_function: Optional[Callable[[], str]] = None, 420 stop_callback_function: Optional[Callable[[SuccessTuple], None]] = None, 421 stop_event: Optional[asyncio.Event] = None, 422 stop_on_exit: bool = False, 423 strip_timestamps: bool = False, 424 accept_input: bool = True, 425 _logs_path: Optional[pathlib.Path] = None, 426 _log=None, 427 _stdin_file=None, 428 debug: bool = False, 429 ): 430 """ 431 Monitor the job's log files and await a callback on new lines. 432 433 Parameters 434 ---------- 435 callback_function: Callable[[str], None], default partial(print, end='') 436 The callback to execute as new data comes in. 437 Defaults to printing the output directly to `stdout`. 438 439 input_callback_function: Optional[Callable[[], str]], default None 440 If provided, execute this callback when the daemon is blocking on stdin. 441 Defaults to `sys.stdin.readline()`. 442 443 stop_callback_function: Optional[Callable[[SuccessTuple]], str], default None 444 If provided, execute this callback when the daemon stops. 445 The job's SuccessTuple will be passed to the callback. 446 447 stop_event: Optional[asyncio.Event], default None 448 If provided, stop monitoring when this event is set. 449 You may instead raise `meerschaum.jobs.StopMonitoringLogs` 450 from within `callback_function` to stop monitoring. 451 452 stop_on_exit: bool, default False 453 If `True`, stop monitoring when the job stops. 454 455 strip_timestamps: bool, default False 456 If `True`, remove leading timestamps from lines. 457 458 accept_input: bool, default True 459 If `True`, accept input when the daemon blocks on stdin. 460 """ 461 def default_input_callback_function(): 462 return sys.stdin.readline() 463 464 if input_callback_function is None: 465 input_callback_function = default_input_callback_function 466 467 if self.executor is not None: 468 await self.executor.monitor_logs_async( 469 self.name, 470 callback_function, 471 input_callback_function=input_callback_function, 472 stop_callback_function=stop_callback_function, 473 stop_on_exit=stop_on_exit, 474 strip_timestamps=strip_timestamps, 475 accept_input=accept_input, 476 debug=debug, 477 ) 478 return 479 480 from meerschaum.utils.formatting._jobs import strip_timestamp_from_line 481 482 events = { 483 'user': stop_event, 484 'stopped': asyncio.Event(), 485 } 486 combined_event = asyncio.Event() 487 emitted_text = False 488 stdin_file = _stdin_file if _stdin_file is not None else self.daemon.stdin_file 489 490 async def check_job_status(): 491 nonlocal emitted_text 492 stopped_event = events.get('stopped', None) 493 if stopped_event is None: 494 return 495 496 sleep_time = 0.1 497 while sleep_time < 60: 498 if self.status == 'stopped': 499 if not emitted_text: 500 await asyncio.sleep(sleep_time) 501 sleep_time = round(sleep_time * 1.1, 2) 502 continue 503 504 if stop_callback_function is not None: 505 try: 506 if asyncio.iscoroutinefunction(stop_callback_function): 507 await stop_callback_function(self.result) 508 else: 509 stop_callback_function(self.result) 510 except asyncio.exceptions.CancelledError: 511 break 512 except Exception: 513 warn(traceback.format_exc()) 514 515 if stop_on_exit: 516 events['stopped'].set() 517 518 break 519 await asyncio.sleep(0.1) 520 521 async def check_blocking_on_input(): 522 while True: 523 if not emitted_text or not self.is_blocking_on_stdin(): 524 try: 525 await asyncio.sleep(0.1) 526 except asyncio.exceptions.CancelledError: 527 break 528 continue 529 530 if not self.is_running(): 531 break 532 533 await emit_latest_lines() 534 535 try: 536 print('', end='', flush=True) 537 if asyncio.iscoroutinefunction(input_callback_function): 538 data = await input_callback_function() 539 else: 540 data = input_callback_function() 541 except KeyboardInterrupt: 542 break 543 if not data.endswith('\n'): 544 data += '\n' 545 546 stdin_file.write(data) 547 await asyncio.sleep(0.1) 548 549 async def combine_events(): 550 event_tasks = [ 551 asyncio.create_task(event.wait()) 552 for event in events.values() 553 if event is not None 554 ] 555 if not event_tasks: 556 return 557 558 try: 559 done, pending = await asyncio.wait( 560 event_tasks, 561 return_when=asyncio.FIRST_COMPLETED, 562 ) 563 for task in pending: 564 task.cancel() 565 except asyncio.exceptions.CancelledError: 566 pass 567 finally: 568 combined_event.set() 569 570 check_job_status_task = asyncio.create_task(check_job_status()) 571 check_blocking_on_input_task = asyncio.create_task(check_blocking_on_input()) 572 combine_events_task = asyncio.create_task(combine_events()) 573 574 log = _log if _log is not None else self.daemon.rotating_log 575 lines_to_show = get_config('jobs', 'logs', 'lines_to_show') 576 577 async def emit_latest_lines(): 578 nonlocal emitted_text 579 lines = log.readlines() 580 for line in lines[(-1 * lines_to_show):]: 581 if stop_event is not None and stop_event.is_set(): 582 return 583 584 if strip_timestamps: 585 line = strip_timestamp_from_line(line) 586 587 try: 588 if asyncio.iscoroutinefunction(callback_function): 589 await callback_function(line) 590 else: 591 callback_function(line) 592 emitted_text = True 593 except StopMonitoringLogs: 594 return 595 except Exception: 596 warn(f"Error in logs callback:\n{traceback.format_exc()}") 597 598 await emit_latest_lines() 599 600 tasks = ( 601 [check_job_status_task] 602 + ([check_blocking_on_input_task] if accept_input else []) 603 + [combine_events_task] 604 ) 605 try: 606 _ = asyncio.gather(*tasks, return_exceptions=True) 607 except asyncio.exceptions.CancelledError: 608 raise 609 except Exception: 610 warn(f"Failed to run async checks:\n{traceback.format_exc()}") 611 612 watchfiles = mrsm.attempt_import('watchfiles') 613 async for changes in watchfiles.awatch( 614 _logs_path or LOGS_RESOURCES_PATH, 615 stop_event=combined_event, 616 ): 617 for change in changes: 618 file_path_str = change[1] 619 file_path = pathlib.Path(file_path_str) 620 latest_subfile_path = log.get_latest_subfile_path() 621 if latest_subfile_path != file_path: 622 continue 623 624 await emit_latest_lines() 625 626 await emit_latest_lines()
Monitor the job's log files and await a callback on new lines.
Parameters
- callback_function (Callable[[str], None], default partial(print, end='')):
The callback to execute as new data comes in.
Defaults to printing the output directly to
stdout
. - input_callback_function (Optional[Callable[[], str]], default None):
If provided, execute this callback when the daemon is blocking on stdin.
Defaults to
sys.stdin.readline()
. - stop_callback_function (Optional[Callable[[SuccessTuple]], str], default None): If provided, execute this callback when the daemon stops. The job's SuccessTuple will be passed to the callback.
- stop_event (Optional[asyncio.Event], default None):
If provided, stop monitoring when this event is set.
You may instead raise
meerschaum.jobs.StopMonitoringLogs
from withincallback_function
to stop monitoring. - stop_on_exit (bool, default False):
If
True
, stop monitoring when the job stops. - strip_timestamps (bool, default False):
If
True
, remove leading timestamps from lines. - accept_input (bool, default True):
If
True
, accept input when the daemon blocks on stdin.
628 def is_blocking_on_stdin(self, debug: bool = False) -> bool: 629 """ 630 Return whether a job's daemon is blocking on stdin. 631 """ 632 if self.executor is not None: 633 return self.executor.get_job_is_blocking_on_stdin(self.name, debug=debug) 634 635 return self.is_running() and self.daemon.blocking_stdin_file_path.exists()
Return whether a job's daemon is blocking on stdin.
637 def write_stdin(self, data): 638 """ 639 Write to a job's daemon's `stdin`. 640 """ 641 self.daemon.stdin_file.write(data)
Write to a job's daemon's stdin
.
643 @property 644 def executor(self) -> Union[Executor, None]: 645 """ 646 If the job is remote, return the connector to the remote API instance. 647 """ 648 return ( 649 mrsm.get_connector(self.executor_keys) 650 if self.executor_keys != 'local' 651 else None 652 )
If the job is remote, return the connector to the remote API instance.
654 @property 655 def status(self) -> str: 656 """ 657 Return the running status of the job's daemon. 658 """ 659 if '_status_hook' in self.__dict__: 660 return self._status_hook() 661 662 if self.executor is not None: 663 return self.executor.get_job_status(self.name) 664 665 return self.daemon.status
Return the running status of the job's daemon.
667 @property 668 def pid(self) -> Union[int, None]: 669 """ 670 Return the PID of the job's dameon. 671 """ 672 if self.executor is not None: 673 return self.executor.get_job_metadata(self.name).get('daemon', {}).get('pid', None) 674 675 return self.daemon.pid
Return the PID of the job's dameon.
677 @property 678 def restart(self) -> bool: 679 """ 680 Return whether to restart a stopped job. 681 """ 682 if self.executor is not None: 683 return self.executor.get_job_metadata(self.name).get('restart', False) 684 685 return self.daemon.properties.get('restart', False)
Return whether to restart a stopped job.
687 @property 688 def result(self) -> SuccessTuple: 689 """ 690 Return the `SuccessTuple` when the job has terminated. 691 """ 692 if self.is_running(): 693 return True, f"{self} is running." 694 695 if '_result_hook' in self.__dict__: 696 return self._result_hook() 697 698 if self.executor is not None: 699 return ( 700 self.executor.get_job_metadata(self.name) 701 .get('result', (False, "No result available.")) 702 ) 703 704 _result = self.daemon.properties.get('result', None) 705 if _result is None: 706 return False, "No result available." 707 708 return tuple(_result)
Return the SuccessTuple
when the job has terminated.
710 @property 711 def sysargs(self) -> List[str]: 712 """ 713 Return the sysargs to use for the Daemon. 714 """ 715 if self._sysargs: 716 return self._sysargs 717 718 if self.executor is not None: 719 return self.executor.get_job_metadata(self.name).get('sysargs', []) 720 721 target_args = self.daemon.target_args 722 if target_args is None: 723 return [] 724 self._sysargs = target_args[0] if len(target_args) > 0 else [] 725 return self._sysargs
Return the sysargs to use for the Daemon.
727 @property 728 def daemon(self) -> 'Daemon': 729 """ 730 Return the daemon which this job manages. 731 """ 732 from meerschaum.utils.daemon import Daemon 733 if self._daemon is not None and self.executor is None and self._sysargs: 734 return self._daemon 735 736 remote_properties = ( 737 {} 738 if self.executor is None 739 else self.executor.get_job_properties(self.name) 740 ) 741 properties = {**remote_properties, **self._properties_patch} 742 743 self._daemon = Daemon( 744 target=entry, 745 target_args=[self._sysargs], 746 target_kw={}, 747 daemon_id=self.name, 748 label=shlex.join(self._sysargs), 749 properties=properties, 750 ) 751 if '_rotating_log' in self.__dict__: 752 self._daemon._rotating_log = self._rotating_log 753 754 if '_stdin_file' in self.__dict__: 755 self._daemon._stdin_file = self._stdin_file 756 self._daemon._blocking_stdin_file_path = self._stdin_file.blocking_file_path 757 758 return self._daemon
Return the daemon which this job manages.
760 @property 761 def began(self) -> Union[datetime, None]: 762 """ 763 The datetime when the job began running. 764 """ 765 if self.executor is not None: 766 began_str = self.executor.get_job_began(self.name) 767 if began_str is None: 768 return None 769 return ( 770 datetime.fromisoformat(began_str) 771 .astimezone(timezone.utc) 772 .replace(tzinfo=None) 773 ) 774 775 began_str = self.daemon.properties.get('process', {}).get('began', None) 776 if began_str is None: 777 return None 778 779 return datetime.fromisoformat(began_str)
The datetime when the job began running.
781 @property 782 def ended(self) -> Union[datetime, None]: 783 """ 784 The datetime when the job stopped running. 785 """ 786 if self.executor is not None: 787 ended_str = self.executor.get_job_ended(self.name) 788 if ended_str is None: 789 return None 790 return ( 791 datetime.fromisoformat(ended_str) 792 .astimezone(timezone.utc) 793 .replace(tzinfo=None) 794 ) 795 796 ended_str = self.daemon.properties.get('process', {}).get('ended', None) 797 if ended_str is None: 798 return None 799 800 return datetime.fromisoformat(ended_str)
The datetime when the job stopped running.
802 @property 803 def paused(self) -> Union[datetime, None]: 804 """ 805 The datetime when the job was suspended while running. 806 """ 807 if self.executor is not None: 808 paused_str = self.executor.get_job_paused(self.name) 809 if paused_str is None: 810 return None 811 return ( 812 datetime.fromisoformat(paused_str) 813 .astimezone(timezone.utc) 814 .replace(tzinfo=None) 815 ) 816 817 paused_str = self.daemon.properties.get('process', {}).get('paused', None) 818 if paused_str is None: 819 return None 820 821 return datetime.fromisoformat(paused_str)
The datetime when the job was suspended while running.
823 @property 824 def stop_time(self) -> Union[datetime, None]: 825 """ 826 Return the timestamp when the job was manually stopped. 827 """ 828 if self.executor is not None: 829 return self.executor.get_job_stop_time(self.name) 830 831 if not self.daemon.stop_path.exists(): 832 return None 833 834 stop_data = self.daemon._read_stop_file() 835 if not stop_data: 836 return None 837 838 stop_time_str = stop_data.get('stop_time', None) 839 if not stop_time_str: 840 warn(f"Could not read stop time for {self}.") 841 return None 842 843 return datetime.fromisoformat(stop_time_str)
Return the timestamp when the job was manually stopped.
856 def check_restart(self) -> SuccessTuple: 857 """ 858 If `restart` is `True` and the daemon is not running, 859 restart the job. 860 Do not restart if the job was manually stopped. 861 """ 862 if self.is_running(): 863 return True, f"{self} is running." 864 865 if not self.restart: 866 return True, f"{self} does not need to be restarted." 867 868 if self.stop_time is not None: 869 return True, f"{self} was manually stopped." 870 871 return self.start()
If restart
is True
and the daemon is not running,
restart the job.
Do not restart if the job was manually stopped.
873 @property 874 def label(self) -> str: 875 """ 876 Return the job's Daemon label (joined sysargs). 877 """ 878 from meerschaum._internal.arguments import compress_pipeline_sysargs 879 sysargs = compress_pipeline_sysargs(self.sysargs) 880 return shlex.join(sysargs).replace(' + ', '\n+ ').replace(' : ', '\n: ').lstrip().rstrip()
Return the job's Daemon label (joined sysargs).
909 @property 910 def env(self) -> Dict[str, str]: 911 """ 912 Return the environment variables to set for the job's process. 913 """ 914 if '_env' in self.__dict__: 915 return self.__dict__['_env'] 916 917 _env = self.daemon.properties.get('env', {}) 918 default_env = { 919 'PYTHONUNBUFFERED': '1', 920 'LINES': str(get_config('jobs', 'terminal', 'lines')), 921 'COLUMNS': str(get_config('jobs', 'terminal', 'columns')), 922 STATIC_CONFIG['environment']['noninteractive']: 'true', 923 } 924 self._env = {**default_env, **_env} 925 return self._env
Return the environment variables to set for the job's process.
927 @property 928 def delete_after_completion(self) -> bool: 929 """ 930 Return whether this job is configured to delete itself after completion. 931 """ 932 if '_delete_after_completion' in self.__dict__: 933 return self.__dict__.get('_delete_after_completion', False) 934 935 self._delete_after_completion = self.daemon.properties.get('delete_after_completion', False) 936 return self._delete_after_completion
Return whether this job is configured to delete itself after completion.
10def pprint( 11 *args, 12 detect_password: bool = True, 13 nopretty: bool = False, 14 **kw 15) -> None: 16 """Pretty print an object according to the configured ANSI and UNICODE settings. 17 If detect_password is True (default), search and replace passwords with '*' characters. 18 Does not mutate objects. 19 """ 20 import copy 21 import json 22 from meerschaum.utils.packages import attempt_import, import_rich 23 from meerschaum.utils.formatting import ANSI, get_console, print_tuple 24 from meerschaum.utils.warnings import error 25 from meerschaum.utils.misc import replace_password, dict_from_od, filter_keywords 26 from collections import OrderedDict 27 28 if ( 29 len(args) == 1 30 and 31 isinstance(args[0], tuple) 32 and 33 len(args[0]) == 2 34 and 35 isinstance(args[0][0], bool) 36 and 37 isinstance(args[0][1], str) 38 ): 39 return print_tuple(args[0], **filter_keywords(print_tuple, **kw)) 40 41 modify = True 42 rich_pprint = None 43 if ANSI and not nopretty: 44 rich = import_rich() 45 if rich is not None: 46 rich_pretty = attempt_import('rich.pretty') 47 if rich_pretty is not None: 48 def _rich_pprint(*args, **kw): 49 _console = get_console() 50 _kw = filter_keywords(_console.print, **kw) 51 _console.print(*args, **_kw) 52 rich_pprint = _rich_pprint 53 elif not nopretty: 54 pprintpp = attempt_import('pprintpp', warn=False) 55 try: 56 _pprint = pprintpp.pprint 57 except Exception : 58 import pprint as _pprint_module 59 _pprint = _pprint_module.pprint 60 61 func = ( 62 _pprint if rich_pprint is None else rich_pprint 63 ) if not nopretty else print 64 65 try: 66 args_copy = copy.deepcopy(args) 67 except Exception: 68 args_copy = args 69 modify = False 70 _args = [] 71 for a in args: 72 c = a 73 ### convert OrderedDict into dict 74 if isinstance(a, OrderedDict) or issubclass(type(a), OrderedDict): 75 c = dict_from_od(copy.deepcopy(c)) 76 _args.append(c) 77 args = _args 78 79 _args = list(args) 80 if detect_password and modify: 81 _args = [] 82 for a in args: 83 c = a 84 if isinstance(c, dict): 85 c = replace_password(copy.deepcopy(c)) 86 if nopretty: 87 try: 88 c = json.dumps(c) 89 is_json = True 90 except Exception: 91 is_json = False 92 if not is_json: 93 try: 94 c = str(c) 95 except Exception: 96 pass 97 _args.append(c) 98 99 ### filter out unsupported keywords 100 func_kw = filter_keywords(func, **kw) if not nopretty else {} 101 error_msg = None 102 try: 103 func(*_args, **func_kw) 104 except Exception as e: 105 error_msg = e 106 if error_msg is not None: 107 error(error_msg)
Pretty print an object according to the configured ANSI and UNICODE settings. If detect_password is True (default), search and replace passwords with '*' characters. Does not mutate objects.
1229def attempt_import( 1230 *names: str, 1231 lazy: bool = True, 1232 warn: bool = True, 1233 install: bool = True, 1234 venv: Optional[str] = 'mrsm', 1235 precheck: bool = True, 1236 split: bool = True, 1237 check_update: bool = False, 1238 check_pypi: bool = False, 1239 check_is_installed: bool = True, 1240 allow_outside_venv: bool = True, 1241 color: bool = True, 1242 debug: bool = False 1243) -> Any: 1244 """ 1245 Raise a warning if packages are not installed; otherwise import and return modules. 1246 If `lazy` is `True`, return lazy-imported modules. 1247 1248 Returns tuple of modules if multiple names are provided, else returns one module. 1249 1250 Parameters 1251 ---------- 1252 names: List[str] 1253 The packages to be imported. 1254 1255 lazy: bool, default True 1256 If `True`, lazily load packages. 1257 1258 warn: bool, default True 1259 If `True`, raise a warning if a package cannot be imported. 1260 1261 install: bool, default True 1262 If `True`, attempt to install a missing package into the designated virtual environment. 1263 If `check_update` is True, install updates if available. 1264 1265 venv: Optional[str], default 'mrsm' 1266 The virtual environment in which to search for packages and to install packages into. 1267 1268 precheck: bool, default True 1269 If `True`, attempt to find module before importing (necessary for checking if modules exist 1270 and retaining lazy imports), otherwise assume lazy is `False`. 1271 1272 split: bool, default True 1273 If `True`, split packages' names on `'.'`. 1274 1275 check_update: bool, default False 1276 If `True` and `install` is `True`, install updates if the required minimum version 1277 does not match. 1278 1279 check_pypi: bool, default False 1280 If `True` and `check_update` is `True`, check PyPI when determining whether 1281 an update is required. 1282 1283 check_is_installed: bool, default True 1284 If `True`, check if the package is contained in the virtual environment. 1285 1286 allow_outside_venv: bool, default True 1287 If `True`, search outside of the specified virtual environment 1288 if the package cannot be found. 1289 Setting to `False` will reinstall the package into a virtual environment, even if it 1290 is installed outside. 1291 1292 color: bool, default True 1293 If `False`, do not print ANSI colors. 1294 1295 Returns 1296 ------- 1297 The specified modules. If they're not available and `install` is `True`, it will first 1298 download them into a virtual environment and return the modules. 1299 1300 Examples 1301 -------- 1302 >>> pandas, sqlalchemy = attempt_import('pandas', 'sqlalchemy') 1303 >>> pandas = attempt_import('pandas') 1304 1305 """ 1306 1307 import importlib.util 1308 1309 ### to prevent recursion, check if parent Meerschaum package is being imported 1310 if names == ('meerschaum',): 1311 return _import_module('meerschaum') 1312 1313 if venv == 'mrsm' and _import_hook_venv is not None: 1314 if debug: 1315 print(f"Import hook for virtual environment '{_import_hook_venv}' is active.") 1316 venv = _import_hook_venv 1317 1318 _warnings = _import_module('meerschaum.utils.warnings') 1319 warn_function = _warnings.warn 1320 1321 def do_import(_name: str, **kw) -> Union['ModuleType', None]: 1322 with Venv(venv=venv, debug=debug): 1323 ### determine the import method (lazy vs normal) 1324 from meerschaum.utils.misc import filter_keywords 1325 import_method = ( 1326 _import_module if not lazy 1327 else lazy_import 1328 ) 1329 try: 1330 mod = import_method(_name, **(filter_keywords(import_method, **kw))) 1331 except Exception as e: 1332 if warn: 1333 import traceback 1334 traceback.print_exception(type(e), e, e.__traceback__) 1335 warn_function( 1336 f"Failed to import module '{_name}'.\nException:\n{e}", 1337 ImportWarning, 1338 stacklevel = (5 if lazy else 4), 1339 color = False, 1340 ) 1341 mod = None 1342 return mod 1343 1344 modules = [] 1345 for name in names: 1346 ### Check if package is a declared dependency. 1347 root_name = name.split('.')[0] if split else name 1348 install_name = _import_to_install_name(root_name) 1349 1350 if install_name is None: 1351 install_name = root_name 1352 if warn and root_name != 'plugins': 1353 warn_function( 1354 f"Package '{root_name}' is not declared in meerschaum.utils.packages.", 1355 ImportWarning, 1356 stacklevel = 3, 1357 color = False 1358 ) 1359 1360 ### Determine if the package exists. 1361 if precheck is False: 1362 found_module = ( 1363 do_import( 1364 name, debug=debug, warn=False, venv=venv, color=color, 1365 check_update=False, check_pypi=False, split=split, 1366 ) is not None 1367 ) 1368 else: 1369 if check_is_installed: 1370 with _locks['_is_installed_first_check']: 1371 if not _is_installed_first_check.get(name, False): 1372 package_is_installed = is_installed( 1373 name, 1374 venv = venv, 1375 split = split, 1376 allow_outside_venv = allow_outside_venv, 1377 debug = debug, 1378 ) 1379 _is_installed_first_check[name] = package_is_installed 1380 else: 1381 package_is_installed = _is_installed_first_check[name] 1382 else: 1383 package_is_installed = _is_installed_first_check.get( 1384 name, 1385 venv_contains_package(name, venv=venv, split=split, debug=debug) 1386 ) 1387 found_module = package_is_installed 1388 1389 if not found_module: 1390 if install: 1391 if not pip_install( 1392 install_name, 1393 venv = venv, 1394 split = False, 1395 check_update = check_update, 1396 color = color, 1397 debug = debug 1398 ) and warn: 1399 warn_function( 1400 f"Failed to install '{install_name}'.", 1401 ImportWarning, 1402 stacklevel = 3, 1403 color = False, 1404 ) 1405 elif warn: 1406 ### Raise a warning if we can't find the package and install = False. 1407 warn_function( 1408 (f"\n\nMissing package '{name}' from virtual environment '{venv}'; " 1409 + "some features will not work correctly." 1410 + f"\n\nSet install=True when calling attempt_import.\n"), 1411 ImportWarning, 1412 stacklevel = 3, 1413 color = False, 1414 ) 1415 1416 ### Do the import. Will be lazy if lazy=True. 1417 m = do_import( 1418 name, debug=debug, warn=warn, venv=venv, color=color, 1419 check_update=check_update, check_pypi=check_pypi, install=install, split=split, 1420 ) 1421 modules.append(m) 1422 1423 modules = tuple(modules) 1424 if len(modules) == 1: 1425 return modules[0] 1426 return modules
Raise a warning if packages are not installed; otherwise import and return modules.
If lazy
is True
, return lazy-imported modules.
Returns tuple of modules if multiple names are provided, else returns one module.
Parameters
- names (List[str]): The packages to be imported.
- lazy (bool, default True):
If
True
, lazily load packages. - warn (bool, default True):
If
True
, raise a warning if a package cannot be imported. - install (bool, default True):
If
True
, attempt to install a missing package into the designated virtual environment. Ifcheck_update
is True, install updates if available. - venv (Optional[str], default 'mrsm'): The virtual environment in which to search for packages and to install packages into.
- precheck (bool, default True):
If
True
, attempt to find module before importing (necessary for checking if modules exist and retaining lazy imports), otherwise assume lazy isFalse
. - split (bool, default True):
If
True
, split packages' names on'.'
. - check_update (bool, default False):
If
True
andinstall
isTrue
, install updates if the required minimum version does not match. - check_pypi (bool, default False):
If
True
andcheck_update
isTrue
, check PyPI when determining whether an update is required. - check_is_installed (bool, default True):
If
True
, check if the package is contained in the virtual environment. - allow_outside_venv (bool, default True):
If
True
, search outside of the specified virtual environment if the package cannot be found. Setting toFalse
will reinstall the package into a virtual environment, even if it is installed outside. - color (bool, default True):
If
False
, do not print ANSI colors.
Returns
- The specified modules. If they're not available and
install
isTrue
, it will first - download them into a virtual environment and return the modules.
Examples
>>> pandas, sqlalchemy = attempt_import('pandas', 'sqlalchemy')
>>> pandas = attempt_import('pandas')
20class Connector(metaclass=abc.ABCMeta): 21 """ 22 The base connector class to hold connection attributes. 23 """ 24 def __init__( 25 self, 26 type: Optional[str] = None, 27 label: Optional[str] = None, 28 **kw: Any 29 ): 30 """ 31 Set the given keyword arguments as attributes. 32 33 Parameters 34 ---------- 35 type: str 36 The `type` of the connector (e.g. `sql`, `api`, `plugin`). 37 38 label: str 39 The `label` for the connector. 40 41 42 Examples 43 -------- 44 Run `mrsm edit config` and to edit connectors in the YAML file: 45 46 ```yaml 47 meerschaum: 48 connections: 49 {type}: 50 {label}: 51 ### attributes go here 52 ``` 53 54 """ 55 self._original_dict = copy.deepcopy(self.__dict__) 56 self._set_attributes(type=type, label=label, **kw) 57 58 ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set. 59 self.verify_attributes( 60 ['uri'] 61 if 'uri' in self.__dict__ 62 else getattr(self, 'REQUIRED_ATTRIBUTES', None) 63 ) 64 65 def _reset_attributes(self): 66 self.__dict__ = self._original_dict 67 68 def _set_attributes( 69 self, 70 *args, 71 inherit_default: bool = True, 72 **kw: Any 73 ): 74 from meerschaum.config.static import STATIC_CONFIG 75 from meerschaum.utils.warnings import error 76 77 self._attributes = {} 78 79 default_label = STATIC_CONFIG['connectors']['default_label'] 80 81 ### NOTE: Support the legacy method of explicitly passing the type. 82 label = kw.get('label', None) 83 if label is None: 84 if len(args) == 2: 85 label = args[1] 86 elif len(args) == 0: 87 label = None 88 else: 89 label = args[0] 90 91 if label == 'default': 92 error( 93 f"Label cannot be 'default'. Did you mean '{default_label}'?", 94 InvalidAttributesError, 95 ) 96 self.__dict__['label'] = label 97 98 from meerschaum.config import get_config 99 conn_configs = copy.deepcopy(get_config('meerschaum', 'connectors')) 100 connector_config = copy.deepcopy(get_config('system', 'connectors')) 101 102 ### inherit attributes from 'default' if exists 103 if inherit_default: 104 inherit_from = 'default' 105 if self.type in conn_configs and inherit_from in conn_configs[self.type]: 106 _inherit_dict = copy.deepcopy(conn_configs[self.type][inherit_from]) 107 self._attributes.update(_inherit_dict) 108 109 ### load user config into self._attributes 110 if self.type in conn_configs and self.label in conn_configs[self.type]: 111 self._attributes.update(conn_configs[self.type][self.label] or {}) 112 113 ### load system config into self._sys_config 114 ### (deep copy so future Connectors don't inherit changes) 115 if self.type in connector_config: 116 self._sys_config = copy.deepcopy(connector_config[self.type]) 117 118 ### add additional arguments or override configuration 119 self._attributes.update(kw) 120 121 ### finally, update __dict__ with _attributes. 122 self.__dict__.update(self._attributes) 123 124 def verify_attributes( 125 self, 126 required_attributes: Optional[List[str]] = None, 127 debug: bool = False, 128 ) -> None: 129 """ 130 Ensure that the required attributes have been met. 131 132 The Connector base class checks the minimum requirements. 133 Child classes may enforce additional requirements. 134 135 Parameters 136 ---------- 137 required_attributes: Optional[List[str]], default None 138 Attributes to be verified. If `None`, default to `['label']`. 139 140 debug: bool, default False 141 Verbosity toggle. 142 143 Returns 144 ------- 145 Don't return anything. 146 147 Raises 148 ------ 149 An error if any of the required attributes are missing. 150 """ 151 from meerschaum.utils.warnings import error, warn 152 from meerschaum.utils.debug import dprint 153 from meerschaum.utils.misc import items_str 154 if required_attributes is None: 155 required_attributes = ['label'] 156 157 missing_attributes = set() 158 for a in required_attributes: 159 if a not in self.__dict__: 160 missing_attributes.add(a) 161 if len(missing_attributes) > 0: 162 error( 163 ( 164 f"Missing {items_str(list(missing_attributes))} " 165 + f"for connector '{self.type}:{self.label}'." 166 ), 167 InvalidAttributesError, 168 silent=True, 169 stack=False 170 ) 171 172 173 def __str__(self): 174 """ 175 When cast to a string, return type:label. 176 """ 177 return f"{self.type}:{self.label}" 178 179 def __repr__(self): 180 """ 181 Represent the connector as type:label. 182 """ 183 return str(self) 184 185 @property 186 def meta(self) -> Dict[str, Any]: 187 """ 188 Return the keys needed to reconstruct this Connector. 189 """ 190 _meta = { 191 key: value 192 for key, value in self.__dict__.items() 193 if not str(key).startswith('_') 194 } 195 _meta.update({ 196 'type': self.type, 197 'label': self.label, 198 }) 199 return _meta 200 201 202 @property 203 def type(self) -> str: 204 """ 205 Return the type for this connector. 206 """ 207 _type = self.__dict__.get('type', None) 208 if _type is None: 209 import re 210 is_executor = self.__class__.__name__.lower().endswith('executor') 211 suffix_regex = ( 212 r'connector$' 213 if not is_executor 214 else r'executor$' 215 ) 216 _type = re.sub(suffix_regex, '', self.__class__.__name__.lower()) 217 self.__dict__['type'] = _type 218 return _type 219 220 221 @property 222 def label(self) -> str: 223 """ 224 Return the label for this connector. 225 """ 226 _label = self.__dict__.get('label', None) 227 if _label is None: 228 from meerschaum.config.static import STATIC_CONFIG 229 _label = STATIC_CONFIG['connectors']['default_label'] 230 self.__dict__['label'] = _label 231 return _label
The base connector class to hold connection attributes.
24 def __init__( 25 self, 26 type: Optional[str] = None, 27 label: Optional[str] = None, 28 **kw: Any 29 ): 30 """ 31 Set the given keyword arguments as attributes. 32 33 Parameters 34 ---------- 35 type: str 36 The `type` of the connector (e.g. `sql`, `api`, `plugin`). 37 38 label: str 39 The `label` for the connector. 40 41 42 Examples 43 -------- 44 Run `mrsm edit config` and to edit connectors in the YAML file: 45 46 ```yaml 47 meerschaum: 48 connections: 49 {type}: 50 {label}: 51 ### attributes go here 52 ``` 53 54 """ 55 self._original_dict = copy.deepcopy(self.__dict__) 56 self._set_attributes(type=type, label=label, **kw) 57 58 ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set. 59 self.verify_attributes( 60 ['uri'] 61 if 'uri' in self.__dict__ 62 else getattr(self, 'REQUIRED_ATTRIBUTES', None) 63 )
124 def verify_attributes( 125 self, 126 required_attributes: Optional[List[str]] = None, 127 debug: bool = False, 128 ) -> None: 129 """ 130 Ensure that the required attributes have been met. 131 132 The Connector base class checks the minimum requirements. 133 Child classes may enforce additional requirements. 134 135 Parameters 136 ---------- 137 required_attributes: Optional[List[str]], default None 138 Attributes to be verified. If `None`, default to `['label']`. 139 140 debug: bool, default False 141 Verbosity toggle. 142 143 Returns 144 ------- 145 Don't return anything. 146 147 Raises 148 ------ 149 An error if any of the required attributes are missing. 150 """ 151 from meerschaum.utils.warnings import error, warn 152 from meerschaum.utils.debug import dprint 153 from meerschaum.utils.misc import items_str 154 if required_attributes is None: 155 required_attributes = ['label'] 156 157 missing_attributes = set() 158 for a in required_attributes: 159 if a not in self.__dict__: 160 missing_attributes.add(a) 161 if len(missing_attributes) > 0: 162 error( 163 ( 164 f"Missing {items_str(list(missing_attributes))} " 165 + f"for connector '{self.type}:{self.label}'." 166 ), 167 InvalidAttributesError, 168 silent=True, 169 stack=False 170 )
Ensure that the required attributes have been met.
The Connector base class checks the minimum requirements. Child classes may enforce additional requirements.
Parameters
- required_attributes (Optional[List[str]], default None):
Attributes to be verified. If
None
, default to['label']
. - debug (bool, default False): Verbosity toggle.
Returns
- Don't return anything.
Raises
- An error if any of the required attributes are missing.
185 @property 186 def meta(self) -> Dict[str, Any]: 187 """ 188 Return the keys needed to reconstruct this Connector. 189 """ 190 _meta = { 191 key: value 192 for key, value in self.__dict__.items() 193 if not str(key).startswith('_') 194 } 195 _meta.update({ 196 'type': self.type, 197 'label': self.label, 198 }) 199 return _meta
Return the keys needed to reconstruct this Connector.
202 @property 203 def type(self) -> str: 204 """ 205 Return the type for this connector. 206 """ 207 _type = self.__dict__.get('type', None) 208 if _type is None: 209 import re 210 is_executor = self.__class__.__name__.lower().endswith('executor') 211 suffix_regex = ( 212 r'connector$' 213 if not is_executor 214 else r'executor$' 215 ) 216 _type = re.sub(suffix_regex, '', self.__class__.__name__.lower()) 217 self.__dict__['type'] = _type 218 return _type
Return the type for this connector.
221 @property 222 def label(self) -> str: 223 """ 224 Return the label for this connector. 225 """ 226 _label = self.__dict__.get('label', None) 227 if _label is None: 228 from meerschaum.config.static import STATIC_CONFIG 229 _label = STATIC_CONFIG['connectors']['default_label'] 230 self.__dict__['label'] = _label 231 return _label
Return the label for this connector.
291def make_connector(cls, _is_executor: bool = False): 292 """ 293 Register a class as a `Connector`. 294 The `type` will be the lower case of the class name, without the suffix `connector`. 295 296 Parameters 297 ---------- 298 instance: bool, default False 299 If `True`, make this connector type an instance connector. 300 This requires implementing the various pipes functions and lots of testing. 301 302 Examples 303 -------- 304 >>> import meerschaum as mrsm 305 >>> from meerschaum.connectors import make_connector, Connector 306 >>> 307 >>> @make_connector 308 >>> class FooConnector(Connector): 309 ... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password'] 310 ... 311 >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat') 312 >>> print(conn.username, conn.password) 313 dog cat 314 >>> 315 """ 316 import re 317 suffix_regex = ( 318 r'connector$' 319 if not _is_executor 320 else r'executor$' 321 ) 322 typ = re.sub(suffix_regex, '', cls.__name__.lower()) 323 with _locks['types']: 324 types[typ] = cls 325 with _locks['custom_types']: 326 custom_types.add(typ) 327 with _locks['connectors']: 328 if typ not in connectors: 329 connectors[typ] = {} 330 if getattr(cls, 'IS_INSTANCE', False): 331 with _locks['instance_types']: 332 if typ not in instance_types: 333 instance_types.append(typ) 334 335 return cls
Register a class as a Connector
.
The type
will be the lower case of the class name, without the suffix connector
.
Parameters
- instance (bool, default False):
If
True
, make this connector type an instance connector. This requires implementing the various pipes functions and lots of testing.
Examples
>>> import meerschaum as mrsm
>>> from meerschaum.connectors import make_connector, Connector
>>>
>>> @make_connector
>>> class FooConnector(Connector):
... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
...
>>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
>>> print(conn.username, conn.password)
dog cat
>>>
49def entry( 50 sysargs: Optional[List[str]] = None, 51 _patch_args: Optional[Dict[str, Any]] = None, 52) -> SuccessTuple: 53 """ 54 Parse arguments and launch a Meerschaum action. 55 56 Returns 57 ------- 58 A `SuccessTuple` indicating success. 59 """ 60 import shlex 61 import json 62 from meerschaum.utils.formatting import make_header 63 from meerschaum._internal.arguments import ( 64 parse_arguments, 65 split_chained_sysargs, 66 split_pipeline_sysargs, 67 sysargs_has_api_executor_keys, 68 get_pipeline_sysargs, 69 ) 70 from meerschaum.config.static import STATIC_CONFIG 71 if sysargs is None: 72 sysargs = [] 73 if not isinstance(sysargs, list): 74 sysargs = shlex.split(sysargs) 75 76 pipeline_key = STATIC_CONFIG['system']['arguments']['pipeline_key'] 77 escaped_pipeline_key = STATIC_CONFIG['system']['arguments']['escaped_pipeline_key'] 78 sysargs, pipeline_args = split_pipeline_sysargs(sysargs) 79 80 has_daemon = '-d' in sysargs or '--daemon' in sysargs 81 has_start_job = sysargs[:2] == ['start', 'job'] 82 pipeline_has_api_executor_keys = sysargs_has_api_executor_keys(pipeline_args) 83 84 chained_sysargs = ( 85 [sysargs] 86 if has_daemon or has_start_job or pipeline_has_api_executor_keys 87 else split_chained_sysargs(sysargs) 88 ) 89 if pipeline_args: 90 chained_sysargs = [get_pipeline_sysargs(sysargs, pipeline_args, _patch_args=_patch_args)] 91 92 results: List[SuccessTuple] = [] 93 94 for _sysargs in chained_sysargs: 95 if escaped_pipeline_key in _sysargs: 96 _sysargs = [ 97 pipeline_key 98 if _arg == escaped_pipeline_key 99 else _arg 100 for _arg in _sysargs 101 ] 102 103 args = parse_arguments(_sysargs) 104 if _patch_args: 105 args.update(_patch_args) 106 argparse_exception = args.get( 107 STATIC_CONFIG['system']['arguments']['failure_key'], 108 None, 109 ) 110 if argparse_exception is not None: 111 args_text = args.get('text', '') 112 if not args_text.startswith('show arguments'): 113 return ( 114 False, 115 ( 116 "Invalid arguments:" 117 + (f"\n{args_text}" if args_text else '') 118 + f"\n {argparse_exception}" 119 ) 120 ) 121 122 entry_success, entry_msg = entry_with_args(_patch_args=_patch_args, **args) 123 results.append((entry_success, entry_msg)) 124 125 if not entry_success: 126 break 127 128 success = all(_success for _success, _ in results) 129 any_success = any(_success for _success, _ in results) 130 success_messages = [_msg for _success, _msg in results if _success] 131 132 successes_msg = ( 133 success_messages[0] 134 if len(success_messages) and len(results) == 1 135 else ( 136 ( 137 'Successfully c' 138 if success 139 else ( 140 'Failed pipeline after ' 141 + f"{len(success_messages)} step" 142 + ('s' if len(success_messages) != 1 else '') 143 + '.\n\nC' 144 ) 145 ) + 'ompleted step' 146 + ('s' if len(success_messages) != 1 else '') 147 + ':\n\n' 148 + '\n'.join( 149 [ 150 ( 151 make_header(shlex.join(_sysargs)) 152 + '\n ' + _msg + '\n' 153 ) 154 for i, (_msg, _sysargs) in enumerate(zip(success_messages, chained_sysargs)) 155 ] 156 ) 157 ) 158 ) 159 has_fail = results[-1][0] is False 160 fail_ix = len(results) - 1 161 fail_sysargs = chained_sysargs[fail_ix] if has_fail else None 162 fail_msg = results[-1][1] if has_fail else '' 163 fails_msg = ( 164 'Failed to complete step:\n\n' 165 + make_header(shlex.join(fail_sysargs)) 166 + '\n ' 167 + fail_msg 168 169 ) if not results[-1][0] else '' 170 171 msg = ( 172 successes_msg 173 + ('\n\n' if any_success else '') 174 + fails_msg 175 ).rstrip() if len(chained_sysargs) > 1 else results[0][1] 176 177 if _systemd_result_path: 178 import json 179 from meerschaum.utils.warnings import warn 180 import meerschaum as mrsm 181 182 job = mrsm.Job(_job_name, executor_keys='systemd') 183 if job.delete_after_completion: 184 delete_success, delete_msg = job.delete() 185 mrsm.pprint((delete_success, delete_msg)) 186 else: 187 try: 188 if _systemd_result_path.parent.exists(): 189 with open(_systemd_result_path, 'w+', encoding='utf-8') as f: 190 json.dump((success, msg), f) 191 except Exception as e: 192 warn(f"Failed to write job result:\n{e}") 193 194 return success, msg