meerschaum.connectors
Create connectors with meerschaum.connectors.get_connector()
.
For ease of use, you can also import from the root meerschaum
module:
>>> from meerschaum import get_connector
>>> conn = get_connector()
1#! /usr/bin/env python 2# -*- coding: utf-8 -*- 3# vim:fenc=utf-8 4 5""" 6Create connectors with `meerschaum.connectors.get_connector()`. 7For ease of use, you can also import from the root `meerschaum` module: 8``` 9>>> from meerschaum import get_connector 10>>> conn = get_connector() 11``` 12""" 13 14from __future__ import annotations 15 16import meerschaum as mrsm 17from meerschaum.utils.typing import Any, Union, List, Dict 18from meerschaum.utils.threading import RLock 19from meerschaum.utils.warnings import warn 20 21from meerschaum.connectors._Connector import Connector, InvalidAttributesError 22from meerschaum.connectors.sql._SQLConnector import SQLConnector 23from meerschaum.connectors.api._APIConnector import APIConnector 24from meerschaum.connectors.sql._create_engine import flavor_configs as sql_flavor_configs 25 26__all__ = ( 27 "make_connector", 28 "Connector", 29 "SQLConnector", 30 "APIConnector", 31 "get_connector", 32 "is_connected", 33 "poll", 34 "api", 35 "sql", 36 "valkey", 37) 38 39### store connectors partitioned by 40### type, label for reuse 41connectors: Dict[str, Dict[str, Connector]] = { 42 'api' : {}, 43 'sql' : {}, 44 'plugin' : {}, 45 'valkey' : {}, 46} 47instance_types: List[str] = ['sql', 'api'] 48_locks: Dict[str, RLock] = { 49 'connectors' : RLock(), 50 'types' : RLock(), 51 'custom_types' : RLock(), 52 '_loaded_plugin_connectors': RLock(), 53 'instance_types' : RLock(), 54} 55attributes: Dict[str, Dict[str, Any]] = { 56 'api': { 57 'required': [ 58 'host', 59 'username', 60 'password', 61 ], 62 'optional': [ 63 'port', 64 ], 65 'default': { 66 'protocol': 'http', 67 }, 68 }, 69 'sql': { 70 'flavors': sql_flavor_configs, 71 }, 72} 73### Fill this with objects only when connectors are first referenced. 74types: Dict[str, Any] = {} 75custom_types: set = set() 76_loaded_plugin_connectors: bool = False 77 78 79def get_connector( 80 type: str = None, 81 label: str = None, 82 refresh: bool = False, 83 debug: bool = False, 84 **kw: Any 85) -> Connector: 86 """ 87 Return existing connector or create new connection and store for reuse. 88 89 You can create new connectors if enough parameters are provided for the given type and flavor. 90 91 92 Parameters 93 ---------- 94 type: Optional[str], default None 95 Connector type (sql, api, etc.). 96 Defaults to the type of the configured `instance_connector`. 97 98 label: Optional[str], default None 99 Connector label (e.g. main). Defaults to `'main'`. 100 101 refresh: bool, default False 102 Refresh the Connector instance / construct new object. Defaults to `False`. 103 104 kw: Any 105 Other arguments to pass to the Connector constructor. 106 If the Connector has already been constructed and new arguments are provided, 107 `refresh` is set to `True` and the old Connector is replaced. 108 109 Returns 110 ------- 111 A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`, 112 `meerschaum.connectors.sql.SQLConnector`). 113 114 Examples 115 -------- 116 The following parameters would create a new 117 `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file. 118 119 ``` 120 >>> conn = get_connector( 121 ... type = 'sql', 122 ... label = 'newlabel', 123 ... flavor = 'sqlite', 124 ... database = '/file/path/to/database.db' 125 ... ) 126 >>> 127 ``` 128 129 """ 130 from meerschaum.connectors.parse import parse_instance_keys 131 from meerschaum.config import get_config 132 from meerschaum.config.static import STATIC_CONFIG 133 from meerschaum.utils.warnings import warn 134 global _loaded_plugin_connectors 135 if isinstance(type, str) and not label and ':' in type: 136 type, label = type.split(':', maxsplit=1) 137 138 with _locks['_loaded_plugin_connectors']: 139 if not _loaded_plugin_connectors: 140 load_plugin_connectors() 141 _load_builtin_custom_connectors() 142 _loaded_plugin_connectors = True 143 144 if type is None and label is None: 145 default_instance_keys = get_config('meerschaum', 'instance', patch=True) 146 ### recursive call to get_connector 147 return parse_instance_keys(default_instance_keys) 148 149 ### NOTE: the default instance connector may not be main. 150 ### Only fall back to 'main' if the type is provided by the label is omitted. 151 label = label if label is not None else STATIC_CONFIG['connectors']['default_label'] 152 153 ### type might actually be a label. Check if so and raise a warning. 154 if type not in connectors: 155 possibilities, poss_msg = [], "" 156 for _type in get_config('meerschaum', 'connectors'): 157 if type in get_config('meerschaum', 'connectors', _type): 158 possibilities.append(f"{_type}:{type}") 159 if len(possibilities) > 0: 160 poss_msg = " Did you mean" 161 for poss in possibilities[:-1]: 162 poss_msg += f" '{poss}'," 163 if poss_msg.endswith(','): 164 poss_msg = poss_msg[:-1] 165 if len(possibilities) > 1: 166 poss_msg += " or" 167 poss_msg += f" '{possibilities[-1]}'?" 168 169 warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False) 170 return None 171 172 if 'sql' not in types: 173 from meerschaum.connectors.plugin import PluginConnector 174 from meerschaum.connectors.valkey import ValkeyConnector 175 with _locks['types']: 176 types.update({ 177 'api': APIConnector, 178 'sql': SQLConnector, 179 'plugin': PluginConnector, 180 'valkey': ValkeyConnector, 181 }) 182 183 ### determine if we need to call the constructor 184 if not refresh: 185 ### see if any user-supplied arguments differ from the existing instance 186 if label in connectors[type]: 187 warning_message = None 188 for attribute, value in kw.items(): 189 if attribute not in connectors[type][label].meta: 190 import inspect 191 cls = connectors[type][label].__class__ 192 cls_init_signature = inspect.signature(cls) 193 cls_init_params = cls_init_signature.parameters 194 if attribute not in cls_init_params: 195 warning_message = ( 196 f"Received new attribute '{attribute}' not present in connector " + 197 f"{connectors[type][label]}.\n" 198 ) 199 elif connectors[type][label].__dict__[attribute] != value: 200 warning_message = ( 201 f"Mismatched values for attribute '{attribute}' in connector " 202 + f"'{connectors[type][label]}'.\n" + 203 f" - Keyword value: '{value}'\n" + 204 f" - Existing value: '{connectors[type][label].__dict__[attribute]}'\n" 205 ) 206 if warning_message is not None: 207 warning_message += ( 208 "\nSetting `refresh` to True and recreating connector with type:" 209 + f" '{type}' and label '{label}'." 210 ) 211 refresh = True 212 warn(warning_message) 213 else: ### connector doesn't yet exist 214 refresh = True 215 216 ### only create an object if refresh is True 217 ### (can be manually specified, otherwise determined above) 218 if refresh: 219 with _locks['connectors']: 220 try: 221 ### will raise an error if configuration is incorrect / missing 222 conn = types[type](label=label, **kw) 223 connectors[type][label] = conn 224 except InvalidAttributesError as ie: 225 warn( 226 f"Incorrect attributes for connector '{type}:{label}'.\n" 227 + str(ie), 228 stack = False, 229 ) 230 conn = None 231 except Exception as e: 232 from meerschaum.utils.formatting import get_console 233 console = get_console() 234 if console: 235 console.print_exception() 236 warn( 237 f"Exception when creating connector '{type}:{label}'.\n" + str(e), 238 stack = False, 239 ) 240 conn = None 241 if conn is None: 242 return None 243 244 return connectors[type][label] 245 246 247def is_connected(keys: str, **kw) -> bool: 248 """ 249 Check if the connector keys correspond to an active connection. 250 If the connector has not been created, it will immediately return `False`. 251 If the connector exists but cannot communicate with the source, return `False`. 252 253 **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`). 254 Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`. 255 256 Parameters 257 ---------- 258 keys: 259 The keys to the connector (e.g. `'sql:main'`). 260 261 Returns 262 ------- 263 A `bool` corresponding to whether a successful connection may be made. 264 265 """ 266 import warnings 267 if ':' not in keys: 268 warn(f"Invalid connector keys '{keys}'") 269 270 try: 271 typ, label = keys.split(':') 272 except Exception: 273 return False 274 if typ not in instance_types: 275 return False 276 if label not in connectors.get(typ, {}): 277 return False 278 279 from meerschaum.connectors.parse import parse_instance_keys 280 conn = parse_instance_keys(keys) 281 try: 282 with warnings.catch_warnings(): 283 warnings.filterwarnings('ignore') 284 return conn.test_connection(**kw) 285 except Exception: 286 return False 287 288 289def make_connector(cls, _is_executor: bool = False): 290 """ 291 Register a class as a `Connector`. 292 The `type` will be the lower case of the class name, without the suffix `connector`. 293 294 Parameters 295 ---------- 296 instance: bool, default False 297 If `True`, make this connector type an instance connector. 298 This requires implementing the various pipes functions and lots of testing. 299 300 Examples 301 -------- 302 >>> import meerschaum as mrsm 303 >>> from meerschaum.connectors import make_connector, Connector 304 >>> 305 >>> @make_connector 306 >>> class FooConnector(Connector): 307 ... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password'] 308 ... 309 >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat') 310 >>> print(conn.username, conn.password) 311 dog cat 312 >>> 313 """ 314 import re 315 suffix_regex = ( 316 r'connector$' 317 if not _is_executor 318 else r'executor$' 319 ) 320 typ = re.sub(suffix_regex, '', cls.__name__.lower()) 321 with _locks['types']: 322 types[typ] = cls 323 with _locks['custom_types']: 324 custom_types.add(typ) 325 with _locks['connectors']: 326 if typ not in connectors: 327 connectors[typ] = {} 328 if getattr(cls, 'IS_INSTANCE', False): 329 with _locks['instance_types']: 330 if typ not in instance_types: 331 instance_types.append(typ) 332 333 return cls 334 335 336def load_plugin_connectors(): 337 """ 338 If a plugin makes use of the `make_connector` decorator, 339 load its module. 340 """ 341 from meerschaum.plugins import get_plugins, import_plugins 342 to_import = [] 343 for plugin in get_plugins(): 344 if plugin is None: 345 continue 346 with open(plugin.__file__, encoding='utf-8') as f: 347 text = f.read() 348 if 'make_connector' in text or 'Connector' in text: 349 to_import.append(plugin.name) 350 if not to_import: 351 return 352 import_plugins(*to_import) 353 354 355def get_connector_plugin( 356 connector: Connector, 357) -> Union[str, None, mrsm.Plugin]: 358 """ 359 Determine the plugin for a connector. 360 This is useful for handling virtual environments for custom instance connectors. 361 362 Parameters 363 ---------- 364 connector: Connector 365 The connector which may require a virtual environment. 366 367 Returns 368 ------- 369 A Plugin, 'mrsm', or None. 370 """ 371 if not hasattr(connector, 'type'): 372 return None 373 plugin_name = ( 374 connector.__module__.replace('plugins.', '').split('.')[0] 375 if connector.type in custom_types else ( 376 connector.label 377 if connector.type == 'plugin' 378 else 'mrsm' 379 ) 380 ) 381 plugin = mrsm.Plugin(plugin_name) 382 return plugin if plugin.is_installed() else None 383 384 385def _load_builtin_custom_connectors(): 386 """ 387 Import custom connectors decorated with `@make_connector` or `@make_executor`. 388 """ 389 import meerschaum.jobs.systemd 390 import meerschaum.connectors.valkey
290def make_connector(cls, _is_executor: bool = False): 291 """ 292 Register a class as a `Connector`. 293 The `type` will be the lower case of the class name, without the suffix `connector`. 294 295 Parameters 296 ---------- 297 instance: bool, default False 298 If `True`, make this connector type an instance connector. 299 This requires implementing the various pipes functions and lots of testing. 300 301 Examples 302 -------- 303 >>> import meerschaum as mrsm 304 >>> from meerschaum.connectors import make_connector, Connector 305 >>> 306 >>> @make_connector 307 >>> class FooConnector(Connector): 308 ... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password'] 309 ... 310 >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat') 311 >>> print(conn.username, conn.password) 312 dog cat 313 >>> 314 """ 315 import re 316 suffix_regex = ( 317 r'connector$' 318 if not _is_executor 319 else r'executor$' 320 ) 321 typ = re.sub(suffix_regex, '', cls.__name__.lower()) 322 with _locks['types']: 323 types[typ] = cls 324 with _locks['custom_types']: 325 custom_types.add(typ) 326 with _locks['connectors']: 327 if typ not in connectors: 328 connectors[typ] = {} 329 if getattr(cls, 'IS_INSTANCE', False): 330 with _locks['instance_types']: 331 if typ not in instance_types: 332 instance_types.append(typ) 333 334 return cls
Register a class as a Connector
.
The type
will be the lower case of the class name, without the suffix connector
.
Parameters
- instance (bool, default False):
If
True
, make this connector type an instance connector. This requires implementing the various pipes functions and lots of testing.
Examples
>>> import meerschaum as mrsm
>>> from meerschaum.connectors import make_connector, Connector
>>>
>>> @make_connector
>>> class FooConnector(Connector):
... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
...
>>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
>>> print(conn.username, conn.password)
dog cat
>>>
20class Connector(metaclass=abc.ABCMeta): 21 """ 22 The base connector class to hold connection attributes. 23 """ 24 def __init__( 25 self, 26 type: Optional[str] = None, 27 label: Optional[str] = None, 28 **kw: Any 29 ): 30 """ 31 Set the given keyword arguments as attributes. 32 33 Parameters 34 ---------- 35 type: str 36 The `type` of the connector (e.g. `sql`, `api`, `plugin`). 37 38 label: str 39 The `label` for the connector. 40 41 42 Examples 43 -------- 44 Run `mrsm edit config` and to edit connectors in the YAML file: 45 46 ```yaml 47 meerschaum: 48 connections: 49 {type}: 50 {label}: 51 ### attributes go here 52 ``` 53 54 """ 55 self._original_dict = copy.deepcopy(self.__dict__) 56 self._set_attributes(type=type, label=label, **kw) 57 58 ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set. 59 self.verify_attributes( 60 ['uri'] 61 if 'uri' in self.__dict__ 62 else getattr(self, 'REQUIRED_ATTRIBUTES', None) 63 ) 64 65 def _reset_attributes(self): 66 self.__dict__ = self._original_dict 67 68 def _set_attributes( 69 self, 70 *args, 71 inherit_default: bool = True, 72 **kw: Any 73 ): 74 from meerschaum.config.static import STATIC_CONFIG 75 from meerschaum.utils.warnings import error 76 77 self._attributes = {} 78 79 default_label = STATIC_CONFIG['connectors']['default_label'] 80 81 ### NOTE: Support the legacy method of explicitly passing the type. 82 label = kw.get('label', None) 83 if label is None: 84 if len(args) == 2: 85 label = args[1] 86 elif len(args) == 0: 87 label = None 88 else: 89 label = args[0] 90 91 if label == 'default': 92 error( 93 f"Label cannot be 'default'. Did you mean '{default_label}'?", 94 InvalidAttributesError, 95 ) 96 self.__dict__['label'] = label 97 98 from meerschaum.config import get_config 99 conn_configs = copy.deepcopy(get_config('meerschaum', 'connectors')) 100 connector_config = copy.deepcopy(get_config('system', 'connectors')) 101 102 ### inherit attributes from 'default' if exists 103 if inherit_default: 104 inherit_from = 'default' 105 if self.type in conn_configs and inherit_from in conn_configs[self.type]: 106 _inherit_dict = copy.deepcopy(conn_configs[self.type][inherit_from]) 107 self._attributes.update(_inherit_dict) 108 109 ### load user config into self._attributes 110 if self.type in conn_configs and self.label in conn_configs[self.type]: 111 self._attributes.update(conn_configs[self.type][self.label] or {}) 112 113 ### load system config into self._sys_config 114 ### (deep copy so future Connectors don't inherit changes) 115 if self.type in connector_config: 116 self._sys_config = copy.deepcopy(connector_config[self.type]) 117 118 ### add additional arguments or override configuration 119 self._attributes.update(kw) 120 121 ### finally, update __dict__ with _attributes. 122 self.__dict__.update(self._attributes) 123 124 def verify_attributes( 125 self, 126 required_attributes: Optional[List[str]] = None, 127 debug: bool = False, 128 ) -> None: 129 """ 130 Ensure that the required attributes have been met. 131 132 The Connector base class checks the minimum requirements. 133 Child classes may enforce additional requirements. 134 135 Parameters 136 ---------- 137 required_attributes: Optional[List[str]], default None 138 Attributes to be verified. If `None`, default to `['label']`. 139 140 debug: bool, default False 141 Verbosity toggle. 142 143 Returns 144 ------- 145 Don't return anything. 146 147 Raises 148 ------ 149 An error if any of the required attributes are missing. 150 """ 151 from meerschaum.utils.warnings import error, warn 152 from meerschaum.utils.debug import dprint 153 from meerschaum.utils.misc import items_str 154 if required_attributes is None: 155 required_attributes = ['label'] 156 157 missing_attributes = set() 158 for a in required_attributes: 159 if a not in self.__dict__: 160 missing_attributes.add(a) 161 if len(missing_attributes) > 0: 162 error( 163 ( 164 f"Missing {items_str(list(missing_attributes))} " 165 + f"for connector '{self.type}:{self.label}'." 166 ), 167 InvalidAttributesError, 168 silent=True, 169 stack=False 170 ) 171 172 173 def __str__(self): 174 """ 175 When cast to a string, return type:label. 176 """ 177 return f"{self.type}:{self.label}" 178 179 def __repr__(self): 180 """ 181 Represent the connector as type:label. 182 """ 183 return str(self) 184 185 @property 186 def meta(self) -> Dict[str, Any]: 187 """ 188 Return the keys needed to reconstruct this Connector. 189 """ 190 _meta = { 191 key: value 192 for key, value in self.__dict__.items() 193 if not str(key).startswith('_') 194 } 195 _meta.update({ 196 'type': self.type, 197 'label': self.label, 198 }) 199 return _meta 200 201 202 @property 203 def type(self) -> str: 204 """ 205 Return the type for this connector. 206 """ 207 _type = self.__dict__.get('type', None) 208 if _type is None: 209 import re 210 is_executor = self.__class__.__name__.lower().endswith('executor') 211 suffix_regex = ( 212 r'connector$' 213 if not is_executor 214 else r'executor$' 215 ) 216 _type = re.sub(suffix_regex, '', self.__class__.__name__.lower()) 217 self.__dict__['type'] = _type 218 return _type 219 220 221 @property 222 def label(self) -> str: 223 """ 224 Return the label for this connector. 225 """ 226 _label = self.__dict__.get('label', None) 227 if _label is None: 228 from meerschaum.config.static import STATIC_CONFIG 229 _label = STATIC_CONFIG['connectors']['default_label'] 230 self.__dict__['label'] = _label 231 return _label
The base connector class to hold connection attributes.
24 def __init__( 25 self, 26 type: Optional[str] = None, 27 label: Optional[str] = None, 28 **kw: Any 29 ): 30 """ 31 Set the given keyword arguments as attributes. 32 33 Parameters 34 ---------- 35 type: str 36 The `type` of the connector (e.g. `sql`, `api`, `plugin`). 37 38 label: str 39 The `label` for the connector. 40 41 42 Examples 43 -------- 44 Run `mrsm edit config` and to edit connectors in the YAML file: 45 46 ```yaml 47 meerschaum: 48 connections: 49 {type}: 50 {label}: 51 ### attributes go here 52 ``` 53 54 """ 55 self._original_dict = copy.deepcopy(self.__dict__) 56 self._set_attributes(type=type, label=label, **kw) 57 58 ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set. 59 self.verify_attributes( 60 ['uri'] 61 if 'uri' in self.__dict__ 62 else getattr(self, 'REQUIRED_ATTRIBUTES', None) 63 )
124 def verify_attributes( 125 self, 126 required_attributes: Optional[List[str]] = None, 127 debug: bool = False, 128 ) -> None: 129 """ 130 Ensure that the required attributes have been met. 131 132 The Connector base class checks the minimum requirements. 133 Child classes may enforce additional requirements. 134 135 Parameters 136 ---------- 137 required_attributes: Optional[List[str]], default None 138 Attributes to be verified. If `None`, default to `['label']`. 139 140 debug: bool, default False 141 Verbosity toggle. 142 143 Returns 144 ------- 145 Don't return anything. 146 147 Raises 148 ------ 149 An error if any of the required attributes are missing. 150 """ 151 from meerschaum.utils.warnings import error, warn 152 from meerschaum.utils.debug import dprint 153 from meerschaum.utils.misc import items_str 154 if required_attributes is None: 155 required_attributes = ['label'] 156 157 missing_attributes = set() 158 for a in required_attributes: 159 if a not in self.__dict__: 160 missing_attributes.add(a) 161 if len(missing_attributes) > 0: 162 error( 163 ( 164 f"Missing {items_str(list(missing_attributes))} " 165 + f"for connector '{self.type}:{self.label}'." 166 ), 167 InvalidAttributesError, 168 silent=True, 169 stack=False 170 )
Ensure that the required attributes have been met.
The Connector base class checks the minimum requirements. Child classes may enforce additional requirements.
Parameters
- required_attributes (Optional[List[str]], default None):
Attributes to be verified. If
None
, default to['label']
. - debug (bool, default False): Verbosity toggle.
Returns
- Don't return anything.
Raises
- An error if any of the required attributes are missing.
185 @property 186 def meta(self) -> Dict[str, Any]: 187 """ 188 Return the keys needed to reconstruct this Connector. 189 """ 190 _meta = { 191 key: value 192 for key, value in self.__dict__.items() 193 if not str(key).startswith('_') 194 } 195 _meta.update({ 196 'type': self.type, 197 'label': self.label, 198 }) 199 return _meta
Return the keys needed to reconstruct this Connector.
202 @property 203 def type(self) -> str: 204 """ 205 Return the type for this connector. 206 """ 207 _type = self.__dict__.get('type', None) 208 if _type is None: 209 import re 210 is_executor = self.__class__.__name__.lower().endswith('executor') 211 suffix_regex = ( 212 r'connector$' 213 if not is_executor 214 else r'executor$' 215 ) 216 _type = re.sub(suffix_regex, '', self.__class__.__name__.lower()) 217 self.__dict__['type'] = _type 218 return _type
Return the type for this connector.
221 @property 222 def label(self) -> str: 223 """ 224 Return the label for this connector. 225 """ 226 _label = self.__dict__.get('label', None) 227 if _label is None: 228 from meerschaum.config.static import STATIC_CONFIG 229 _label = STATIC_CONFIG['connectors']['default_label'] 230 self.__dict__['label'] = _label 231 return _label
Return the label for this connector.
18class SQLConnector(Connector): 19 """ 20 Connect to SQL databases via `sqlalchemy`. 21 22 SQLConnectors may be used as Meerschaum instance connectors. 23 Read more about connectors and instances at 24 https://meerschaum.io/reference/connectors/ 25 26 """ 27 28 IS_INSTANCE: bool = True 29 30 from ._create_engine import flavor_configs, create_engine 31 from ._sql import ( 32 read, 33 value, 34 exec, 35 execute, 36 to_sql, 37 exec_queries, 38 get_connection, 39 _cleanup_connections, 40 ) 41 from meerschaum.utils.sql import test_connection 42 from ._fetch import fetch, get_pipe_metadef 43 from ._cli import cli, _cli_exit 44 from ._pipes import ( 45 fetch_pipes_keys, 46 create_indices, 47 drop_indices, 48 get_create_index_queries, 49 get_drop_index_queries, 50 get_add_columns_queries, 51 get_alter_columns_queries, 52 delete_pipe, 53 get_pipe_data, 54 get_pipe_data_query, 55 register_pipe, 56 edit_pipe, 57 get_pipe_id, 58 get_pipe_attributes, 59 sync_pipe, 60 sync_pipe_inplace, 61 get_sync_time, 62 pipe_exists, 63 get_pipe_rowcount, 64 drop_pipe, 65 clear_pipe, 66 deduplicate_pipe, 67 get_pipe_table, 68 get_pipe_columns_types, 69 get_to_sql_dtype, 70 get_pipe_schema, 71 ) 72 from ._plugins import ( 73 register_plugin, 74 delete_plugin, 75 get_plugin_id, 76 get_plugin_version, 77 get_plugins, 78 get_plugin_user_id, 79 get_plugin_username, 80 get_plugin_attributes, 81 ) 82 from ._users import ( 83 register_user, 84 get_user_id, 85 get_users, 86 edit_user, 87 delete_user, 88 get_user_password_hash, 89 get_user_type, 90 get_user_attributes, 91 ) 92 from ._uri import from_uri, parse_uri 93 from ._instance import ( 94 _log_temporary_tables_creation, 95 _drop_temporary_table, 96 _drop_temporary_tables, 97 _drop_old_temporary_tables, 98 ) 99 100 def __init__( 101 self, 102 label: Optional[str] = None, 103 flavor: Optional[str] = None, 104 wait: bool = False, 105 connect: bool = False, 106 debug: bool = False, 107 **kw: Any 108 ): 109 """ 110 Parameters 111 ---------- 112 label: str, default 'main' 113 The identifying label for the connector. 114 E.g. for `sql:main`, 'main' is the label. 115 Defaults to 'main'. 116 117 flavor: Optional[str], default None 118 The database flavor, e.g. 119 `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc. 120 To see supported flavors, run the `bootstrap connectors` command. 121 122 wait: bool, default False 123 If `True`, block until a database connection has been made. 124 Defaults to `False`. 125 126 connect: bool, default False 127 If `True`, immediately attempt to connect the database and raise 128 a warning if the connection fails. 129 Defaults to `False`. 130 131 debug: bool, default False 132 Verbosity toggle. 133 Defaults to `False`. 134 135 kw: Any 136 All other arguments will be passed to the connector's attributes. 137 Therefore, a connector may be made without being registered, 138 as long enough parameters are supplied to the constructor. 139 """ 140 if 'uri' in kw: 141 uri = kw['uri'] 142 if uri.startswith('postgres') and not uri.startswith('postgresql'): 143 uri = uri.replace('postgres', 'postgresql', 1) 144 if uri.startswith('postgresql') and not uri.startswith('postgresql+'): 145 uri = uri.replace('postgresql://', 'postgresql+psycopg', 1) 146 if uri.startswith('timescaledb://'): 147 uri = uri.replace('timescaledb://', 'postgresql://', 1) 148 flavor = 'timescaledb' 149 kw['uri'] = uri 150 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 151 label = label or from_uri_params.get('label', None) 152 _ = from_uri_params.pop('label', None) 153 154 ### Sometimes the flavor may be provided with a URI. 155 kw.update(from_uri_params) 156 if flavor: 157 kw['flavor'] = flavor 158 159 160 ### set __dict__ in base class 161 super().__init__( 162 'sql', 163 label = label or self.__dict__.get('label', None), 164 **kw 165 ) 166 167 if self.__dict__.get('flavor', None) == 'sqlite': 168 self._reset_attributes() 169 self._set_attributes( 170 'sql', 171 label = label, 172 inherit_default = False, 173 **kw 174 ) 175 ### For backwards compatability reasons, set the path for sql:local if its missing. 176 if self.label == 'local' and not self.__dict__.get('database', None): 177 from meerschaum.config._paths import SQLITE_DB_PATH 178 self.database = str(SQLITE_DB_PATH) 179 180 ### ensure flavor and label are set accordingly 181 if 'flavor' not in self.__dict__: 182 if flavor is None and 'uri' not in self.__dict__: 183 raise Exception( 184 f" Missing flavor. Provide flavor as a key for '{self}'." 185 ) 186 self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None) 187 188 if self.flavor == 'postgres': 189 self.flavor = 'postgresql' 190 191 self._debug = debug 192 ### Store the PID and thread at initialization 193 ### so we can dispose of the Pool in child processes or threads. 194 import os, threading 195 self._pid = os.getpid() 196 self._thread_ident = threading.current_thread().ident 197 self._sessions = {} 198 self._locks = {'_sessions': threading.RLock(), } 199 200 ### verify the flavor's requirements are met 201 if self.flavor not in self.flavor_configs: 202 error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector") 203 if not self.__dict__.get('uri'): 204 self.verify_attributes( 205 self.flavor_configs[self.flavor].get('requirements', set()), 206 debug=debug, 207 ) 208 209 if wait: 210 from meerschaum.connectors.poll import retry_connect 211 retry_connect(connector=self, debug=debug) 212 213 if connect: 214 if not self.test_connection(debug=debug): 215 from meerschaum.utils.warnings import warn 216 warn(f"Failed to connect with connector '{self}'!", stack=False) 217 218 @property 219 def Session(self): 220 if '_Session' not in self.__dict__: 221 if self.engine is None: 222 return None 223 224 from meerschaum.utils.packages import attempt_import 225 sqlalchemy_orm = attempt_import('sqlalchemy.orm') 226 session_factory = sqlalchemy_orm.sessionmaker(self.engine) 227 self._Session = sqlalchemy_orm.scoped_session(session_factory) 228 229 return self._Session 230 231 @property 232 def engine(self): 233 import os, threading 234 ### build the sqlalchemy engine 235 if '_engine' not in self.__dict__: 236 self._engine, self._engine_str = self.create_engine(include_uri=True) 237 238 same_process = os.getpid() == self._pid 239 same_thread = threading.current_thread().ident == self._thread_ident 240 241 ### handle child processes 242 if not same_process: 243 self._pid = os.getpid() 244 self._thread = threading.current_thread() 245 from meerschaum.utils.warnings import warn 246 warn(f"Different PID detected. Disposing of connections...") 247 self._engine.dispose() 248 249 ### handle different threads 250 if not same_thread: 251 pass 252 253 return self._engine 254 255 @property 256 def DATABASE_URL(self) -> str: 257 """ 258 Return the URI connection string (alias for `SQLConnector.URI`. 259 """ 260 _ = self.engine 261 return str(self._engine_str) 262 263 @property 264 def URI(self) -> str: 265 """ 266 Return the URI connection string. 267 """ 268 _ = self.engine 269 return str(self._engine_str) 270 271 @property 272 def IS_THREAD_SAFE(self) -> str: 273 """ 274 Return whether this connector may be multithreaded. 275 """ 276 if self.flavor in ('duckdb', 'oracle'): 277 return False 278 if self.flavor == 'sqlite': 279 return ':memory:' not in self.URI 280 return True 281 282 283 @property 284 def metadata(self): 285 """ 286 Return the metadata bound to this configured schema. 287 """ 288 from meerschaum.utils.packages import attempt_import 289 sqlalchemy = attempt_import('sqlalchemy') 290 if '_metadata' not in self.__dict__: 291 self._metadata = sqlalchemy.MetaData(schema=self.schema) 292 return self._metadata 293 294 295 @property 296 def instance_schema(self): 297 """ 298 Return the schema name for Meerschaum tables. 299 """ 300 return self.schema 301 302 303 @property 304 def internal_schema(self): 305 """ 306 Return the schema name for internal tables. 307 """ 308 from meerschaum.config.static import STATIC_CONFIG 309 from meerschaum.utils.packages import attempt_import 310 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 311 schema_name = self.__dict__.get('internal_schema', None) or ( 312 STATIC_CONFIG['sql']['internal_schema'] 313 if self.flavor not in NO_SCHEMA_FLAVORS 314 else self.schema 315 ) 316 317 if '_internal_schema' not in self.__dict__: 318 self._internal_schema = schema_name 319 return self._internal_schema 320 321 322 @property 323 def db(self) -> Optional[databases.Database]: 324 from meerschaum.utils.packages import attempt_import 325 databases = attempt_import('databases', lazy=False, install=True) 326 url = self.DATABASE_URL 327 if 'mysql' in url: 328 url = url.replace('+pymysql', '') 329 if '_db' not in self.__dict__: 330 try: 331 self._db = databases.Database(url) 332 except KeyError: 333 ### Likely encountered an unsupported flavor. 334 from meerschaum.utils.warnings import warn 335 self._db = None 336 return self._db 337 338 339 @property 340 def db_version(self) -> Union[str, None]: 341 """ 342 Return the database version. 343 """ 344 _db_version = self.__dict__.get('_db_version', None) 345 if _db_version is not None: 346 return _db_version 347 348 from meerschaum.utils.sql import get_db_version 349 self._db_version = get_db_version(self) 350 return self._db_version 351 352 353 @property 354 def schema(self) -> Union[str, None]: 355 """ 356 Return the default schema to use. 357 A value of `None` will not prepend a schema. 358 """ 359 if 'schema' in self.__dict__: 360 return self.__dict__['schema'] 361 362 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 363 if self.flavor in NO_SCHEMA_FLAVORS: 364 self.__dict__['schema'] = None 365 return None 366 367 sqlalchemy = mrsm.attempt_import('sqlalchemy') 368 _schema = sqlalchemy.inspect(self.engine).default_schema_name 369 self.__dict__['schema'] = _schema 370 return _schema 371 372 373 def __getstate__(self): 374 return self.__dict__ 375 376 def __setstate__(self, d): 377 self.__dict__.update(d) 378 379 def __call__(self): 380 return self
Connect to SQL databases via sqlalchemy
.
SQLConnectors may be used as Meerschaum instance connectors. Read more about connectors and instances at https://meerschaum.io/reference/connectors/
100 def __init__( 101 self, 102 label: Optional[str] = None, 103 flavor: Optional[str] = None, 104 wait: bool = False, 105 connect: bool = False, 106 debug: bool = False, 107 **kw: Any 108 ): 109 """ 110 Parameters 111 ---------- 112 label: str, default 'main' 113 The identifying label for the connector. 114 E.g. for `sql:main`, 'main' is the label. 115 Defaults to 'main'. 116 117 flavor: Optional[str], default None 118 The database flavor, e.g. 119 `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc. 120 To see supported flavors, run the `bootstrap connectors` command. 121 122 wait: bool, default False 123 If `True`, block until a database connection has been made. 124 Defaults to `False`. 125 126 connect: bool, default False 127 If `True`, immediately attempt to connect the database and raise 128 a warning if the connection fails. 129 Defaults to `False`. 130 131 debug: bool, default False 132 Verbosity toggle. 133 Defaults to `False`. 134 135 kw: Any 136 All other arguments will be passed to the connector's attributes. 137 Therefore, a connector may be made without being registered, 138 as long enough parameters are supplied to the constructor. 139 """ 140 if 'uri' in kw: 141 uri = kw['uri'] 142 if uri.startswith('postgres') and not uri.startswith('postgresql'): 143 uri = uri.replace('postgres', 'postgresql', 1) 144 if uri.startswith('postgresql') and not uri.startswith('postgresql+'): 145 uri = uri.replace('postgresql://', 'postgresql+psycopg', 1) 146 if uri.startswith('timescaledb://'): 147 uri = uri.replace('timescaledb://', 'postgresql://', 1) 148 flavor = 'timescaledb' 149 kw['uri'] = uri 150 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 151 label = label or from_uri_params.get('label', None) 152 _ = from_uri_params.pop('label', None) 153 154 ### Sometimes the flavor may be provided with a URI. 155 kw.update(from_uri_params) 156 if flavor: 157 kw['flavor'] = flavor 158 159 160 ### set __dict__ in base class 161 super().__init__( 162 'sql', 163 label = label or self.__dict__.get('label', None), 164 **kw 165 ) 166 167 if self.__dict__.get('flavor', None) == 'sqlite': 168 self._reset_attributes() 169 self._set_attributes( 170 'sql', 171 label = label, 172 inherit_default = False, 173 **kw 174 ) 175 ### For backwards compatability reasons, set the path for sql:local if its missing. 176 if self.label == 'local' and not self.__dict__.get('database', None): 177 from meerschaum.config._paths import SQLITE_DB_PATH 178 self.database = str(SQLITE_DB_PATH) 179 180 ### ensure flavor and label are set accordingly 181 if 'flavor' not in self.__dict__: 182 if flavor is None and 'uri' not in self.__dict__: 183 raise Exception( 184 f" Missing flavor. Provide flavor as a key for '{self}'." 185 ) 186 self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None) 187 188 if self.flavor == 'postgres': 189 self.flavor = 'postgresql' 190 191 self._debug = debug 192 ### Store the PID and thread at initialization 193 ### so we can dispose of the Pool in child processes or threads. 194 import os, threading 195 self._pid = os.getpid() 196 self._thread_ident = threading.current_thread().ident 197 self._sessions = {} 198 self._locks = {'_sessions': threading.RLock(), } 199 200 ### verify the flavor's requirements are met 201 if self.flavor not in self.flavor_configs: 202 error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector") 203 if not self.__dict__.get('uri'): 204 self.verify_attributes( 205 self.flavor_configs[self.flavor].get('requirements', set()), 206 debug=debug, 207 ) 208 209 if wait: 210 from meerschaum.connectors.poll import retry_connect 211 retry_connect(connector=self, debug=debug) 212 213 if connect: 214 if not self.test_connection(debug=debug): 215 from meerschaum.utils.warnings import warn 216 warn(f"Failed to connect with connector '{self}'!", stack=False)
Parameters
- label (str, default 'main'):
The identifying label for the connector.
E.g. for
sql:main
, 'main' is the label. Defaults to 'main'. - flavor (Optional[str], default None):
The database flavor, e.g.
'sqlite'
,'postgresql'
,'cockroachdb'
, etc. To see supported flavors, run thebootstrap connectors
command. - wait (bool, default False):
If
True
, block until a database connection has been made. Defaults toFalse
. - connect (bool, default False):
If
True
, immediately attempt to connect the database and raise a warning if the connection fails. Defaults toFalse
. - debug (bool, default False):
Verbosity toggle.
Defaults to
False
. - kw (Any): All other arguments will be passed to the connector's attributes. Therefore, a connector may be made without being registered, as long enough parameters are supplied to the constructor.
218 @property 219 def Session(self): 220 if '_Session' not in self.__dict__: 221 if self.engine is None: 222 return None 223 224 from meerschaum.utils.packages import attempt_import 225 sqlalchemy_orm = attempt_import('sqlalchemy.orm') 226 session_factory = sqlalchemy_orm.sessionmaker(self.engine) 227 self._Session = sqlalchemy_orm.scoped_session(session_factory) 228 229 return self._Session
231 @property 232 def engine(self): 233 import os, threading 234 ### build the sqlalchemy engine 235 if '_engine' not in self.__dict__: 236 self._engine, self._engine_str = self.create_engine(include_uri=True) 237 238 same_process = os.getpid() == self._pid 239 same_thread = threading.current_thread().ident == self._thread_ident 240 241 ### handle child processes 242 if not same_process: 243 self._pid = os.getpid() 244 self._thread = threading.current_thread() 245 from meerschaum.utils.warnings import warn 246 warn(f"Different PID detected. Disposing of connections...") 247 self._engine.dispose() 248 249 ### handle different threads 250 if not same_thread: 251 pass 252 253 return self._engine
255 @property 256 def DATABASE_URL(self) -> str: 257 """ 258 Return the URI connection string (alias for `SQLConnector.URI`. 259 """ 260 _ = self.engine 261 return str(self._engine_str)
Return the URI connection string (alias for SQLConnector.URI
.
263 @property 264 def URI(self) -> str: 265 """ 266 Return the URI connection string. 267 """ 268 _ = self.engine 269 return str(self._engine_str)
Return the URI connection string.
271 @property 272 def IS_THREAD_SAFE(self) -> str: 273 """ 274 Return whether this connector may be multithreaded. 275 """ 276 if self.flavor in ('duckdb', 'oracle'): 277 return False 278 if self.flavor == 'sqlite': 279 return ':memory:' not in self.URI 280 return True
Return whether this connector may be multithreaded.
283 @property 284 def metadata(self): 285 """ 286 Return the metadata bound to this configured schema. 287 """ 288 from meerschaum.utils.packages import attempt_import 289 sqlalchemy = attempt_import('sqlalchemy') 290 if '_metadata' not in self.__dict__: 291 self._metadata = sqlalchemy.MetaData(schema=self.schema) 292 return self._metadata
Return the metadata bound to this configured schema.
295 @property 296 def instance_schema(self): 297 """ 298 Return the schema name for Meerschaum tables. 299 """ 300 return self.schema
Return the schema name for Meerschaum tables.
303 @property 304 def internal_schema(self): 305 """ 306 Return the schema name for internal tables. 307 """ 308 from meerschaum.config.static import STATIC_CONFIG 309 from meerschaum.utils.packages import attempt_import 310 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 311 schema_name = self.__dict__.get('internal_schema', None) or ( 312 STATIC_CONFIG['sql']['internal_schema'] 313 if self.flavor not in NO_SCHEMA_FLAVORS 314 else self.schema 315 ) 316 317 if '_internal_schema' not in self.__dict__: 318 self._internal_schema = schema_name 319 return self._internal_schema
Return the schema name for internal tables.
322 @property 323 def db(self) -> Optional[databases.Database]: 324 from meerschaum.utils.packages import attempt_import 325 databases = attempt_import('databases', lazy=False, install=True) 326 url = self.DATABASE_URL 327 if 'mysql' in url: 328 url = url.replace('+pymysql', '') 329 if '_db' not in self.__dict__: 330 try: 331 self._db = databases.Database(url) 332 except KeyError: 333 ### Likely encountered an unsupported flavor. 334 from meerschaum.utils.warnings import warn 335 self._db = None 336 return self._db
339 @property 340 def db_version(self) -> Union[str, None]: 341 """ 342 Return the database version. 343 """ 344 _db_version = self.__dict__.get('_db_version', None) 345 if _db_version is not None: 346 return _db_version 347 348 from meerschaum.utils.sql import get_db_version 349 self._db_version = get_db_version(self) 350 return self._db_version
Return the database version.
353 @property 354 def schema(self) -> Union[str, None]: 355 """ 356 Return the default schema to use. 357 A value of `None` will not prepend a schema. 358 """ 359 if 'schema' in self.__dict__: 360 return self.__dict__['schema'] 361 362 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 363 if self.flavor in NO_SCHEMA_FLAVORS: 364 self.__dict__['schema'] = None 365 return None 366 367 sqlalchemy = mrsm.attempt_import('sqlalchemy') 368 _schema = sqlalchemy.inspect(self.engine).default_schema_name 369 self.__dict__['schema'] = _schema 370 return _schema
Return the default schema to use.
A value of None
will not prepend a schema.
180def create_engine( 181 self, 182 include_uri: bool = False, 183 debug: bool = False, 184 **kw 185) -> 'sqlalchemy.engine.Engine': 186 """Create a sqlalchemy engine by building the engine string.""" 187 from meerschaum.utils.packages import attempt_import 188 from meerschaum.utils.warnings import error, warn 189 sqlalchemy = attempt_import('sqlalchemy') 190 import urllib 191 import copy 192 ### Install and patch required drivers. 193 if self.flavor in install_flavor_drivers: 194 attempt_import(*install_flavor_drivers[self.flavor], debug=debug, lazy=False, warn=False) 195 if self.flavor == 'mssql': 196 pyodbc = attempt_import('pyodbc', debug=debug, lazy=False, warn=False) 197 pyodbc.pooling = False 198 if self.flavor in require_patching_flavors: 199 from meerschaum.utils.packages import determine_version, _monkey_patch_get_distribution 200 import pathlib 201 for install_name, import_name in require_patching_flavors[self.flavor]: 202 pkg = attempt_import( 203 import_name, 204 debug=debug, 205 lazy=False, 206 warn=False 207 ) 208 _monkey_patch_get_distribution( 209 install_name, determine_version(pathlib.Path(pkg.__file__), venv='mrsm') 210 ) 211 212 ### supplement missing values with defaults (e.g. port number) 213 for a, value in flavor_configs[self.flavor]['defaults'].items(): 214 if a not in self.__dict__: 215 self.__dict__[a] = value 216 217 ### Verify that everything is in order. 218 if self.flavor not in flavor_configs: 219 error(f"Cannot create a connector with the flavor '{self.flavor}'.") 220 221 _engine = flavor_configs[self.flavor].get('engine', None) 222 _username = self.__dict__.get('username', None) 223 _password = self.__dict__.get('password', None) 224 _host = self.__dict__.get('host', None) 225 _port = self.__dict__.get('port', None) 226 _database = self.__dict__.get('database', None) 227 _options = self.__dict__.get('options', {}) 228 if isinstance(_options, str): 229 _options = dict(urllib.parse.parse_qsl(_options)) 230 _uri = self.__dict__.get('uri', None) 231 232 ### Handle registering specific dialects (due to installing in virtual environments). 233 if self.flavor in flavor_dialects: 234 sqlalchemy.dialects.registry.register(*flavor_dialects[self.flavor]) 235 236 ### self._sys_config was deepcopied and can be updated safely 237 if self.flavor in ("sqlite", "duckdb"): 238 engine_str = f"{_engine}:///{_database}" if not _uri else _uri 239 if 'create_engine' not in self._sys_config: 240 self._sys_config['create_engine'] = {} 241 if 'connect_args' not in self._sys_config['create_engine']: 242 self._sys_config['create_engine']['connect_args'] = {} 243 self._sys_config['create_engine']['connect_args'].update({"check_same_thread" : False}) 244 else: 245 engine_str = ( 246 _engine + "://" + (_username if _username is not None else '') + 247 ((":" + urllib.parse.quote_plus(_password)) if _password is not None else '') + 248 "@" + _host + ((":" + str(_port)) if _port is not None else '') + 249 (("/" + _database) if _database is not None else '') 250 + (("?" + urllib.parse.urlencode(_options)) if _options else '') 251 ) if not _uri else _uri 252 253 ### Sometimes the timescaledb:// flavor can slip in. 254 if _uri and self.flavor in ('timescaledb',) and self.flavor in _uri: 255 engine_str = engine_str.replace(f'{self.flavor}', 'postgresql', 1) 256 257 if debug: 258 dprint( 259 ( 260 (engine_str.replace(':' + _password, ':' + ('*' * len(_password)))) 261 if _password is not None else engine_str 262 ) + '\n' + f"{self._sys_config.get('create_engine', {}).get('connect_args', {})}" 263 ) 264 265 _kw_copy = copy.deepcopy(kw) 266 267 ### NOTE: Order of inheritance: 268 ### 1. Defaults 269 ### 2. System configuration 270 ### 3. Connector configuration 271 ### 4. Keyword arguments 272 _create_engine_args = flavor_configs.get(self.flavor, {}).get('create_engine', {}) 273 def _apply_create_engine_args(update): 274 if 'ALL' not in flavor_configs[self.flavor].get('omit_create_engine', {}): 275 _create_engine_args.update( 276 { k: v for k, v in update.items() 277 if 'omit_create_engine' not in flavor_configs[self.flavor] 278 or k not in flavor_configs[self.flavor].get('omit_create_engine') 279 } 280 ) 281 _apply_create_engine_args(self._sys_config.get('create_engine', {})) 282 _apply_create_engine_args(self.__dict__.get('create_engine', {})) 283 _apply_create_engine_args(_kw_copy) 284 285 try: 286 engine = sqlalchemy.create_engine( 287 engine_str, 288 ### I know this looks confusing, and maybe it's bad code, 289 ### but it's simple. It dynamically parses the config string 290 ### and splits it to separate the class name (QueuePool) 291 ### from the module name (sqlalchemy.pool). 292 poolclass = getattr( 293 attempt_import( 294 ".".join(self._sys_config['poolclass'].split('.')[:-1]) 295 ), 296 self._sys_config['poolclass'].split('.')[-1] 297 ), 298 echo = debug, 299 **_create_engine_args 300 ) 301 except Exception as e: 302 warn(f"Failed to create connector '{self}':\n{traceback.format_exc()}", stack=False) 303 engine = None 304 305 if include_uri: 306 return engine, engine_str 307 return engine
Create a sqlalchemy engine by building the engine string.
26def read( 27 self, 28 query_or_table: Union[str, sqlalchemy.Query], 29 params: Union[Dict[str, Any], List[str], None] = None, 30 dtype: Optional[Dict[str, Any]] = None, 31 coerce_float: bool = True, 32 chunksize: Optional[int] = -1, 33 workers: Optional[int] = None, 34 chunk_hook: Optional[Callable[[pandas.DataFrame], Any]] = None, 35 as_hook_results: bool = False, 36 chunks: Optional[int] = None, 37 schema: Optional[str] = None, 38 as_chunks: bool = False, 39 as_iterator: bool = False, 40 as_dask: bool = False, 41 index_col: Optional[str] = None, 42 silent: bool = False, 43 debug: bool = False, 44 **kw: Any 45) -> Union[ 46 pandas.DataFrame, 47 dask.DataFrame, 48 List[pandas.DataFrame], 49 List[Any], 50 None, 51]: 52 """ 53 Read a SQL query or table into a pandas dataframe. 54 55 Parameters 56 ---------- 57 query_or_table: Union[str, sqlalchemy.Query] 58 The SQL query (sqlalchemy Query or string) or name of the table from which to select. 59 60 params: Optional[Dict[str, Any]], default None 61 `List` or `Dict` of parameters to pass to `pandas.read_sql()`. 62 See the pandas documentation for more information: 63 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html 64 65 dtype: Optional[Dict[str, Any]], default None 66 A dictionary of data types to pass to `pandas.read_sql()`. 67 See the pandas documentation for more information: 68 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html 69 70 chunksize: Optional[int], default -1 71 How many chunks to read at a time. `None` will read everything in one large chunk. 72 Defaults to system configuration. 73 74 **NOTE:** DuckDB does not allow for chunking. 75 76 workers: Optional[int], default None 77 How many threads to use when consuming the generator. 78 Only applies if `chunk_hook` is provided. 79 80 chunk_hook: Optional[Callable[[pandas.DataFrame], Any]], default None 81 Hook function to execute once per chunk, e.g. writing and reading chunks intermittently. 82 See `--sync-chunks` for an example. 83 **NOTE:** `as_iterator` MUST be False (default). 84 85 as_hook_results: bool, default False 86 If `True`, return a `List` of the outputs of the hook function. 87 Only applicable if `chunk_hook` is not None. 88 89 **NOTE:** `as_iterator` MUST be `False` (default). 90 91 chunks: Optional[int], default None 92 Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and 93 return into a single dataframe. 94 For example, to limit the returned dataframe to 100,000 rows, 95 you could specify a `chunksize` of `1000` and `chunks` of `100`. 96 97 schema: Optional[str], default None 98 If just a table name is provided, optionally specify the table schema. 99 Defaults to `SQLConnector.schema`. 100 101 as_chunks: bool, default False 102 If `True`, return a list of DataFrames. 103 Otherwise return a single DataFrame. 104 105 as_iterator: bool, default False 106 If `True`, return the pandas DataFrame iterator. 107 `chunksize` must not be `None` (falls back to 1000 if so), 108 and hooks are not called in this case. 109 110 index_col: Optional[str], default None 111 If using Dask, use this column as the index column. 112 If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame. 113 114 silent: bool, default False 115 If `True`, don't raise warnings in case of errors. 116 Defaults to `False`. 117 118 Returns 119 ------- 120 A `pd.DataFrame` (default case), or an iterator, or a list of dataframes / iterators, 121 or `None` if something breaks. 122 123 """ 124 if chunks is not None and chunks <= 0: 125 return [] 126 from meerschaum.utils.sql import sql_item_name, truncate_item_name 127 from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS 128 from meerschaum.utils.packages import attempt_import, import_pandas 129 from meerschaum.utils.pool import get_pool 130 from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols 131 import warnings 132 import traceback 133 from decimal import Decimal 134 pd = import_pandas() 135 dd = None 136 is_dask = 'dask' in pd.__name__ 137 pd = attempt_import('pandas') 138 is_dask = dd is not None 139 npartitions = chunksize_to_npartitions(chunksize) 140 if is_dask: 141 chunksize = None 142 schema = schema or self.schema 143 144 pool = get_pool(workers=workers) 145 sqlalchemy = attempt_import("sqlalchemy") 146 default_chunksize = self._sys_config.get('chunksize', None) 147 chunksize = chunksize if chunksize != -1 else default_chunksize 148 if chunksize is None and as_iterator: 149 if not silent and self.flavor not in _disallow_chunks_flavors: 150 warn( 151 "An iterator may only be generated if chunksize is not None.\n" 152 + "Falling back to a chunksize of 1000.", stacklevel=3, 153 ) 154 chunksize = 1000 155 if chunksize is not None and self.flavor in _max_chunks_flavors: 156 if chunksize > _max_chunks_flavors[self.flavor]: 157 if chunksize != default_chunksize: 158 warn( 159 f"The specified chunksize of {chunksize} exceeds the maximum of " 160 + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n" 161 + f" Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.", 162 stacklevel=3, 163 ) 164 chunksize = _max_chunks_flavors[self.flavor] 165 166 ### NOTE: A bug in duckdb_engine does not allow for chunks. 167 if chunksize is not None and self.flavor in _disallow_chunks_flavors: 168 chunksize = None 169 170 if debug: 171 import time 172 start = time.perf_counter() 173 dprint(f"[{self}]\n{query_or_table}") 174 dprint(f"[{self}] Fetching with chunksize: {chunksize}") 175 176 ### This might be sqlalchemy object or the string of a table name. 177 ### We check for spaces and quotes to see if it might be a weird table. 178 if ( 179 ' ' not in str(query_or_table) 180 or ( 181 ' ' in str(query_or_table) 182 and str(query_or_table).startswith('"') 183 and str(query_or_table).endswith('"') 184 ) 185 ): 186 truncated_table_name = truncate_item_name(str(query_or_table), self.flavor) 187 if truncated_table_name != str(query_or_table) and not silent: 188 warn( 189 f"Table '{query_or_table}' is too long for '{self.flavor}'," 190 + f" will instead read the table '{truncated_table_name}'." 191 ) 192 193 query_or_table = sql_item_name(str(query_or_table), self.flavor, schema) 194 if debug: 195 dprint(f"[{self}] Reading from table {query_or_table}") 196 formatted_query = sqlalchemy.text("SELECT * FROM " + str(query_or_table)) 197 str_query = f"SELECT * FROM {query_or_table}" 198 else: 199 str_query = query_or_table 200 201 formatted_query = ( 202 sqlalchemy.text(str_query) 203 if not is_dask and isinstance(str_query, str) 204 else format_sql_query_for_dask(str_query) 205 ) 206 207 chunk_list = [] 208 chunk_hook_results = [] 209 def _process_chunk(_chunk, _retry_on_failure: bool = True): 210 if not as_hook_results: 211 chunk_list.append(_chunk) 212 if chunk_hook is None: 213 return None 214 215 result = None 216 try: 217 result = chunk_hook( 218 _chunk, 219 workers=workers, 220 chunksize=chunksize, 221 debug=debug, 222 **kw 223 ) 224 except Exception: 225 result = False, traceback.format_exc() 226 from meerschaum.utils.formatting import get_console 227 if not silent: 228 get_console().print_exception() 229 230 ### If the chunk fails to process, try it again one more time. 231 if isinstance(result, tuple) and result[0] is False: 232 if _retry_on_failure: 233 return _process_chunk(_chunk, _retry_on_failure=False) 234 235 return result 236 237 try: 238 stream_results = not as_iterator and chunk_hook is not None and chunksize is not None 239 with warnings.catch_warnings(): 240 warnings.filterwarnings('ignore', 'case sensitivity issues') 241 242 read_sql_query_kwargs = { 243 'params': params, 244 'dtype': dtype, 245 'coerce_float': coerce_float, 246 'index_col': index_col, 247 } 248 if is_dask: 249 if index_col is None: 250 dd = None 251 pd = attempt_import('pandas') 252 read_sql_query_kwargs.update({ 253 'chunksize': chunksize, 254 }) 255 else: 256 read_sql_query_kwargs.update({ 257 'chunksize': chunksize, 258 }) 259 260 if is_dask and dd is not None: 261 ddf = dd.read_sql_query( 262 formatted_query, 263 self.URI, 264 **read_sql_query_kwargs 265 ) 266 else: 267 268 def get_chunk_generator(connectable): 269 chunk_generator = pd.read_sql_query( 270 formatted_query, 271 self.engine, 272 **read_sql_query_kwargs 273 ) 274 to_return = ( 275 chunk_generator 276 if as_iterator or chunksize is None 277 else ( 278 list(pool.imap(_process_chunk, chunk_generator)) 279 if as_hook_results 280 else None 281 ) 282 ) 283 return chunk_generator, to_return 284 285 if self.flavor in SKIP_READ_TRANSACTION_FLAVORS: 286 chunk_generator, to_return = get_chunk_generator(self.engine) 287 else: 288 with self.engine.begin() as transaction: 289 with transaction.execution_options(stream_results=stream_results) as connection: 290 chunk_generator, to_return = get_chunk_generator(connection) 291 292 if to_return is not None: 293 return to_return 294 295 except Exception as e: 296 if debug: 297 dprint(f"[{self}] Failed to execute query:\n\n{query_or_table}\n\n") 298 if not silent: 299 warn(str(e), stacklevel=3) 300 from meerschaum.utils.formatting import get_console 301 if not silent: 302 get_console().print_exception() 303 304 return None 305 306 if is_dask and dd is not None: 307 ddf = ddf.reset_index() 308 return ddf 309 310 chunk_list = [] 311 read_chunks = 0 312 chunk_hook_results = [] 313 if chunksize is None: 314 chunk_list.append(chunk_generator) 315 elif as_iterator: 316 return chunk_generator 317 else: 318 try: 319 for chunk in chunk_generator: 320 if chunk_hook is not None: 321 chunk_hook_results.append( 322 chunk_hook(chunk, chunksize=chunksize, debug=debug, **kw) 323 ) 324 chunk_list.append(chunk) 325 read_chunks += 1 326 if chunks is not None and read_chunks >= chunks: 327 break 328 except Exception as e: 329 warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3) 330 from meerschaum.utils.formatting import get_console 331 if not silent: 332 get_console().print_exception() 333 334 read_chunks = 0 335 try: 336 for chunk in chunk_generator: 337 if chunk_hook is not None: 338 chunk_hook_results.append( 339 chunk_hook(chunk, chunksize=chunksize, debug=debug, **kw) 340 ) 341 chunk_list.append(chunk) 342 read_chunks += 1 343 if chunks is not None and read_chunks >= chunks: 344 break 345 except Exception as e: 346 warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3) 347 from meerschaum.utils.formatting import get_console 348 if not silent: 349 get_console().print_exception() 350 351 return None 352 353 ### If no chunks returned, read without chunks 354 ### to get columns 355 if len(chunk_list) == 0: 356 with warnings.catch_warnings(): 357 warnings.filterwarnings('ignore', 'case sensitivity issues') 358 _ = read_sql_query_kwargs.pop('chunksize', None) 359 with self.engine.begin() as connection: 360 chunk_list.append( 361 pd.read_sql_query( 362 formatted_query, 363 connection, 364 **read_sql_query_kwargs 365 ) 366 ) 367 368 ### call the hook on any missed chunks. 369 if chunk_hook is not None and len(chunk_list) > len(chunk_hook_results): 370 for c in chunk_list[len(chunk_hook_results):]: 371 chunk_hook_results.append( 372 chunk_hook(c, chunksize=chunksize, debug=debug, **kw) 373 ) 374 375 ### chunksize is not None so must iterate 376 if debug: 377 end = time.perf_counter() 378 dprint(f"Fetched {len(chunk_list)} chunks in {round(end - start, 2)} seconds.") 379 380 if as_hook_results: 381 return chunk_hook_results 382 383 ### Skip `pd.concat()` if `as_chunks` is specified. 384 if as_chunks: 385 for c in chunk_list: 386 c.reset_index(drop=True, inplace=True) 387 for col in get_numeric_cols(c): 388 c[col] = c[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x) 389 return chunk_list 390 391 df = pd.concat(chunk_list).reset_index(drop=True) 392 ### NOTE: The calls to `canonical()` are to drop leading and trailing zeroes. 393 for col in get_numeric_cols(df): 394 df[col] = df[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x) 395 396 return df
Read a SQL query or table into a pandas dataframe.
Parameters
- query_or_table (Union[str, sqlalchemy.Query]): The SQL query (sqlalchemy Query or string) or name of the table from which to select.
- params (Optional[Dict[str, Any]], default None):
List
orDict
of parameters to pass topandas.read_sql()
. See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html - dtype (Optional[Dict[str, Any]], default None):
A dictionary of data types to pass to
pandas.read_sql()
. See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html chunksize (Optional[int], default -1): How many chunks to read at a time.
None
will read everything in one large chunk. Defaults to system configuration.NOTE: DuckDB does not allow for chunking.
- workers (Optional[int], default None):
How many threads to use when consuming the generator.
Only applies if
chunk_hook
is provided. - chunk_hook (Optional[Callable[[pandas.DataFrame], Any]], default None):
Hook function to execute once per chunk, e.g. writing and reading chunks intermittently.
See
--sync-chunks
for an example. NOTE:as_iterator
MUST be False (default). as_hook_results (bool, default False): If
True
, return aList
of the outputs of the hook function. Only applicable ifchunk_hook
is not None.NOTE:
as_iterator
MUST beFalse
(default).- chunks (Optional[int], default None):
Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and
return into a single dataframe.
For example, to limit the returned dataframe to 100,000 rows,
you could specify a
chunksize
of1000
andchunks
of100
. - schema (Optional[str], default None):
If just a table name is provided, optionally specify the table schema.
Defaults to
SQLConnector.schema
. - as_chunks (bool, default False):
If
True
, return a list of DataFrames. Otherwise return a single DataFrame. - as_iterator (bool, default False):
If
True
, return the pandas DataFrame iterator.chunksize
must not beNone
(falls back to 1000 if so), and hooks are not called in this case. - index_col (Optional[str], default None): If using Dask, use this column as the index column. If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
- silent (bool, default False):
If
True
, don't raise warnings in case of errors. Defaults toFalse
.
Returns
- A
pd.DataFrame
(default case), or an iterator, or a list of dataframes / iterators, - or
None
if something breaks.
399def value( 400 self, 401 query: str, 402 *args: Any, 403 use_pandas: bool = False, 404 **kw: Any 405) -> Any: 406 """ 407 Execute the provided query and return the first value. 408 409 Parameters 410 ---------- 411 query: str 412 The SQL query to execute. 413 414 *args: Any 415 The arguments passed to `meerschaum.connectors.sql.SQLConnector.exec` 416 if `use_pandas` is `False` (default) or to `meerschaum.connectors.sql.SQLConnector.read`. 417 418 use_pandas: bool, default False 419 If `True`, use `meerschaum.connectors.SQLConnector.read`, otherwise use 420 `meerschaum.connectors.sql.SQLConnector.exec` (default). 421 **NOTE:** This is always `True` for DuckDB. 422 423 **kw: Any 424 See `args`. 425 426 Returns 427 ------- 428 Any value returned from the query. 429 430 """ 431 from meerschaum.utils.packages import attempt_import 432 sqlalchemy = attempt_import('sqlalchemy') 433 if self.flavor == 'duckdb': 434 use_pandas = True 435 if use_pandas: 436 try: 437 return self.read(query, *args, **kw).iloc[0, 0] 438 except Exception: 439 return None 440 441 _close = kw.get('close', True) 442 _commit = kw.get('commit', (self.flavor != 'mssql')) 443 444 # _close = True 445 # _commit = True 446 447 try: 448 result, connection = self.exec( 449 query, 450 *args, 451 with_connection=True, 452 close=False, 453 commit=_commit, 454 **kw 455 ) 456 first = result.first() if result is not None else None 457 _val = first[0] if first is not None else None 458 except Exception as e: 459 warn(e, stacklevel=3) 460 return None 461 if _close: 462 try: 463 connection.close() 464 except Exception as e: 465 warn("Failed to close connection with exception:\n" + str(e)) 466 return _val
Execute the provided query and return the first value.
Parameters
- query (str): The SQL query to execute.
- *args (Any):
The arguments passed to
meerschaum.connectors.sql.SQLConnector.exec
ifuse_pandas
isFalse
(default) or tomeerschaum.connectors.sql.SQLConnector.read
. - use_pandas (bool, default False):
If
True
, usemeerschaum.connectors.SQLConnector.read
, otherwise usemeerschaum.connectors.sql.SQLConnector.exec
(default). NOTE: This is alwaysTrue
for DuckDB. - **kw (Any):
See
args
.
Returns
- Any value returned from the query.
480def exec( 481 self, 482 query: str, 483 *args: Any, 484 silent: bool = False, 485 debug: bool = False, 486 commit: Optional[bool] = None, 487 close: Optional[bool] = None, 488 with_connection: bool = False, 489 **kw: Any 490) -> Union[ 491 sqlalchemy.engine.result.resultProxy, 492 sqlalchemy.engine.cursor.LegacyCursorResult, 493 Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection], 494 Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection], 495 None 496]: 497 """ 498 Execute SQL code and return the `sqlalchemy` result, e.g. when calling stored procedures. 499 500 If inserting data, please use bind variables to avoid SQL injection! 501 502 Parameters 503 ---------- 504 query: Union[str, List[str], Tuple[str]] 505 The query to execute. 506 If `query` is a list or tuple, call `self.exec_queries()` instead. 507 508 args: Any 509 Arguments passed to `sqlalchemy.engine.execute`. 510 511 silent: bool, default False 512 If `True`, suppress warnings. 513 514 commit: Optional[bool], default None 515 If `True`, commit the changes after execution. 516 Causes issues with flavors like `'mssql'`. 517 This does not apply if `query` is a list of strings. 518 519 close: Optional[bool], default None 520 If `True`, close the connection after execution. 521 Causes issues with flavors like `'mssql'`. 522 This does not apply if `query` is a list of strings. 523 524 with_connection: bool, default False 525 If `True`, return a tuple including the connection object. 526 This does not apply if `query` is a list of strings. 527 528 Returns 529 ------- 530 The `sqlalchemy` result object, or a tuple with the connection if `with_connection` is provided. 531 532 """ 533 if isinstance(query, (list, tuple)): 534 return self.exec_queries( 535 list(query), 536 *args, 537 silent=silent, 538 debug=debug, 539 **kw 540 ) 541 542 from meerschaum.utils.packages import attempt_import 543 sqlalchemy = attempt_import("sqlalchemy") 544 if debug: 545 dprint(f"[{self}] Executing query:\n{query}") 546 547 _close = close if close is not None else (self.flavor != 'mssql') 548 _commit = commit if commit is not None else ( 549 (self.flavor != 'mssql' or 'select' not in str(query).lower()) 550 ) 551 552 ### Select and Insert objects need to be compiled (SQLAlchemy 2.0.0+). 553 if not hasattr(query, 'compile'): 554 query = sqlalchemy.text(query) 555 556 connection = self.get_connection() 557 558 try: 559 transaction = connection.begin() if _commit else None 560 except sqlalchemy.exc.InvalidRequestError: 561 connection = self.get_connection(rebuild=True) 562 transaction = connection.begin() 563 564 if transaction is not None and not transaction.is_active: 565 connection = self.get_connection(rebuild=True) 566 transaction = connection.begin() if _commit else None 567 568 result = None 569 try: 570 result = connection.execute(query, *args, **kw) 571 if _commit: 572 transaction.commit() 573 except Exception as e: 574 if debug: 575 dprint(f"[{self}] Failed to execute query:\n\n{query}\n\n{e}") 576 if not silent: 577 warn(str(e), stacklevel=3) 578 result = None 579 if _commit: 580 transaction.rollback() 581 connection = self.get_connection(rebuild=True) 582 finally: 583 if _close: 584 connection.close() 585 586 if with_connection: 587 return result, connection 588 589 return result
Execute SQL code and return the sqlalchemy
result, e.g. when calling stored procedures.
If inserting data, please use bind variables to avoid SQL injection!
Parameters
- query (Union[str, List[str], Tuple[str]]):
The query to execute.
If
query
is a list or tuple, callself.exec_queries()
instead. - args (Any):
Arguments passed to
sqlalchemy.engine.execute
. - silent (bool, default False):
If
True
, suppress warnings. - commit (Optional[bool], default None):
If
True
, commit the changes after execution. Causes issues with flavors like'mssql'
. This does not apply ifquery
is a list of strings. - close (Optional[bool], default None):
If
True
, close the connection after execution. Causes issues with flavors like'mssql'
. This does not apply ifquery
is a list of strings. - with_connection (bool, default False):
If
True
, return a tuple including the connection object. This does not apply ifquery
is a list of strings.
Returns
- The
sqlalchemy
result object, or a tuple with the connection ifwith_connection
is provided.
469def execute( 470 self, 471 *args : Any, 472 **kw : Any 473) -> Optional[sqlalchemy.engine.result.resultProxy]: 474 """ 475 An alias for `meerschaum.connectors.sql.SQLConnector.exec`. 476 """ 477 return self.exec(*args, **kw)
An alias for meerschaum.connectors.sql.SQLConnector.exec
.
686def to_sql( 687 self, 688 df: pandas.DataFrame, 689 name: str = None, 690 index: bool = False, 691 if_exists: str = 'replace', 692 method: str = "", 693 chunksize: Optional[int] = -1, 694 schema: Optional[str] = None, 695 silent: bool = False, 696 debug: bool = False, 697 as_tuple: bool = False, 698 as_dict: bool = False, 699 **kw 700) -> Union[bool, SuccessTuple]: 701 """ 702 Upload a DataFrame's contents to the SQL server. 703 704 Parameters 705 ---------- 706 df: pd.DataFrame 707 The DataFrame to be uploaded. 708 709 name: str 710 The name of the table to be created. 711 712 index: bool, default False 713 If True, creates the DataFrame's indices as columns. 714 715 if_exists: str, default 'replace' 716 Drop and create the table ('replace') or append if it exists 717 ('append') or raise Exception ('fail'). 718 Options are ['replace', 'append', 'fail']. 719 720 method: str, default '' 721 None or multi. Details on pandas.to_sql. 722 723 chunksize: Optional[int], default -1 724 How many rows to insert at a time. 725 726 schema: Optional[str], default None 727 Optionally override the schema for the table. 728 Defaults to `SQLConnector.schema`. 729 730 as_tuple: bool, default False 731 If `True`, return a (success_bool, message) tuple instead of a `bool`. 732 Defaults to `False`. 733 734 as_dict: bool, default False 735 If `True`, return a dictionary of transaction information. 736 The keys are `success`, `msg`, `start`, `end`, `duration`, `num_rows`, `chunksize`, 737 `method`, and `target`. 738 739 kw: Any 740 Additional arguments will be passed to the DataFrame's `to_sql` function 741 742 Returns 743 ------- 744 Either a `bool` or a `SuccessTuple` (depends on `as_tuple`). 745 """ 746 import time 747 import json 748 import decimal 749 from decimal import Decimal, Context 750 from meerschaum.utils.warnings import error, warn 751 import warnings 752 import functools 753 if name is None: 754 error(f"Name must not be `None` to insert data into {self}.") 755 756 ### We're requiring `name` to be positional, and sometimes it's passed in from background jobs. 757 kw.pop('name', None) 758 759 schema = schema or self.schema 760 761 from meerschaum.utils.sql import ( 762 sql_item_name, 763 table_exists, 764 json_flavors, 765 truncate_item_name, 766 DROP_IF_EXISTS_FLAVORS, 767 ) 768 from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols 769 from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal 770 from meerschaum.utils.dtypes.sql import ( 771 NUMERIC_PRECISION_FLAVORS, 772 PD_TO_SQLALCHEMY_DTYPES_FLAVORS, 773 ) 774 from meerschaum.connectors.sql._create_engine import flavor_configs 775 from meerschaum.utils.packages import attempt_import, import_pandas 776 sqlalchemy = attempt_import('sqlalchemy', debug=debug) 777 pd = import_pandas() 778 is_dask = 'dask' in df.__module__ 779 780 stats = {'target': name, } 781 ### resort to defaults if None 782 if method == "": 783 if self.flavor in _bulk_flavors: 784 method = functools.partial(psql_insert_copy, schema=self.schema) 785 else: 786 ### Should resolve to 'multi' or `None`. 787 method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi') 788 stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method) 789 790 default_chunksize = self._sys_config.get('chunksize', None) 791 chunksize = chunksize if chunksize != -1 else default_chunksize 792 if chunksize is not None and self.flavor in _max_chunks_flavors: 793 if chunksize > _max_chunks_flavors[self.flavor]: 794 if chunksize != default_chunksize: 795 warn( 796 f"The specified chunksize of {chunksize} exceeds the maximum of " 797 + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n" 798 + f" Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.", 799 stacklevel = 3, 800 ) 801 chunksize = _max_chunks_flavors[self.flavor] 802 stats['chunksize'] = chunksize 803 804 success, msg = False, "Default to_sql message" 805 start = time.perf_counter() 806 if debug: 807 msg = f"[{self}] Inserting {len(df)} rows with chunksize: {chunksize}..." 808 print(msg, end="", flush=True) 809 stats['num_rows'] = len(df) 810 811 ### Check if the name is too long. 812 truncated_name = truncate_item_name(name, self.flavor) 813 if name != truncated_name: 814 warn( 815 f"Table '{name}' is too long for '{self.flavor}'," 816 + f" will instead create the table '{truncated_name}'." 817 ) 818 819 ### filter out non-pandas args 820 import inspect 821 to_sql_params = inspect.signature(df.to_sql).parameters 822 to_sql_kw = {} 823 for k, v in kw.items(): 824 if k in to_sql_params: 825 to_sql_kw[k] = v 826 827 to_sql_kw.update({ 828 'name': truncated_name, 829 'schema': schema, 830 ('con' if not is_dask else 'uri'): (self.engine if not is_dask else self.URI), 831 'index': index, 832 'if_exists': if_exists, 833 'method': method, 834 'chunksize': chunksize, 835 }) 836 if is_dask: 837 to_sql_kw.update({ 838 'parallel': True, 839 }) 840 841 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 842 if self.flavor == 'oracle': 843 ### For some reason 'replace' doesn't work properly in pandas, 844 ### so try dropping first. 845 if if_exists == 'replace' and table_exists(name, self, schema=schema, debug=debug): 846 success = self.exec( 847 f"DROP TABLE {if_exists_str}" + sql_item_name(name, 'oracle', schema) 848 ) is not None 849 if not success: 850 warn(f"Unable to drop {name}") 851 852 853 ### Enforce NVARCHAR(2000) as text instead of CLOB. 854 dtype = to_sql_kw.get('dtype', {}) 855 for col, typ in df.dtypes.items(): 856 if are_dtypes_equal(str(typ), 'object'): 857 dtype[col] = sqlalchemy.types.NVARCHAR(2000) 858 elif are_dtypes_equal(str(typ), 'int'): 859 dtype[col] = sqlalchemy.types.INTEGER 860 to_sql_kw['dtype'] = dtype 861 elif self.flavor == 'mssql': 862 dtype = to_sql_kw.get('dtype', {}) 863 for col, typ in df.dtypes.items(): 864 if are_dtypes_equal(str(typ), 'bool'): 865 dtype[col] = sqlalchemy.types.INTEGER 866 to_sql_kw['dtype'] = dtype 867 868 ### Check for JSON columns. 869 if self.flavor not in json_flavors: 870 json_cols = get_json_cols(df) 871 if json_cols: 872 for col in json_cols: 873 df[col] = df[col].apply( 874 ( 875 lambda x: json.dumps(x, default=str, sort_keys=True) 876 if not isinstance(x, Hashable) 877 else x 878 ) 879 ) 880 881 ### Check for numeric columns. 882 numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None)) 883 if numeric_precision is not None and numeric_scale is not None: 884 numeric_cols = get_numeric_cols(df) 885 for col in numeric_cols: 886 df[col] = df[col].apply( 887 lambda x: ( 888 quantize_decimal(x, numeric_scale, numeric_precision) 889 if isinstance(x, Decimal) 890 else x 891 ) 892 ) 893 894 if PD_TO_SQLALCHEMY_DTYPES_FLAVORS['uuid'].get(self.flavor, None) != 'Uuid': 895 uuid_cols = get_uuid_cols(df) 896 for col in uuid_cols: 897 df[col] = df[col].astype(str) 898 899 try: 900 with warnings.catch_warnings(): 901 warnings.filterwarnings('ignore', 'case sensitivity issues') 902 df.to_sql(**to_sql_kw) 903 success = True 904 except Exception as e: 905 if not silent: 906 warn(str(e)) 907 success, msg = False, str(e) 908 909 end = time.perf_counter() 910 if success: 911 msg = f"It took {round(end - start, 2)} seconds to sync {len(df)} rows to {name}." 912 stats['start'] = start 913 stats['end'] = end 914 stats['duration'] = end - start 915 916 if debug: 917 print(f" done.", flush=True) 918 dprint(msg) 919 920 stats['success'] = success 921 stats['msg'] = msg 922 if as_tuple: 923 return success, msg 924 if as_dict: 925 return stats 926 return success
Upload a DataFrame's contents to the SQL server.
Parameters
- df (pd.DataFrame): The DataFrame to be uploaded.
- name (str): The name of the table to be created.
- index (bool, default False): If True, creates the DataFrame's indices as columns.
- if_exists (str, default 'replace'): Drop and create the table ('replace') or append if it exists ('append') or raise Exception ('fail'). Options are ['replace', 'append', 'fail'].
- method (str, default ''): None or multi. Details on pandas.to_sql.
- chunksize (Optional[int], default -1): How many rows to insert at a time.
- schema (Optional[str], default None):
Optionally override the schema for the table.
Defaults to
SQLConnector.schema
. - as_tuple (bool, default False):
If
True
, return a (success_bool, message) tuple instead of abool
. Defaults toFalse
. - as_dict (bool, default False):
If
True
, return a dictionary of transaction information. The keys aresuccess
,msg
,start
,end
,duration
,num_rows
,chunksize
,method
, andtarget
. - kw (Any):
Additional arguments will be passed to the DataFrame's
to_sql
function
Returns
- Either a
bool
or aSuccessTuple
(depends onas_tuple
).
592def exec_queries( 593 self, 594 queries: List[ 595 Union[ 596 str, 597 Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]] 598 ] 599 ], 600 break_on_error: bool = False, 601 rollback: bool = True, 602 silent: bool = False, 603 debug: bool = False, 604) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]: 605 """ 606 Execute a list of queries in a single transaction. 607 608 Parameters 609 ---------- 610 queries: List[ 611 Union[ 612 str, 613 Tuple[str, Callable[[], List[str]]] 614 ] 615 ] 616 The queries in the transaction to be executed. 617 If a query is a tuple, the second item of the tuple 618 will be considered a callable hook that returns a list of queries to be executed 619 before the next item in the list. 620 621 break_on_error: bool, default True 622 If `True`, stop executing when a query fails. 623 624 rollback: bool, default True 625 If `break_on_error` is `True`, rollback the transaction if a query fails. 626 627 silent: bool, default False 628 If `True`, suppress warnings. 629 630 Returns 631 ------- 632 A list of SQLAlchemy results. 633 """ 634 from meerschaum.utils.warnings import warn 635 from meerschaum.utils.debug import dprint 636 from meerschaum.utils.packages import attempt_import 637 sqlalchemy, sqlalchemy_orm = attempt_import('sqlalchemy', 'sqlalchemy.orm') 638 session = sqlalchemy_orm.Session(self.engine) 639 640 result = None 641 results = [] 642 with session.begin(): 643 for query in queries: 644 hook = None 645 result = None 646 647 if isinstance(query, tuple): 648 query, hook = query 649 if isinstance(query, str): 650 query = sqlalchemy.text(query) 651 652 if debug: 653 dprint(f"[{self}]\n" + str(query)) 654 655 try: 656 result = session.execute(query) 657 session.flush() 658 except Exception as e: 659 msg = (f"Encountered error while executing:\n{e}") 660 if not silent: 661 warn(msg) 662 elif debug: 663 dprint(f"[{self}]\n" + str(msg)) 664 result = None 665 if result is None and break_on_error: 666 if rollback: 667 session.rollback() 668 break 669 elif result is not None and hook is not None: 670 hook_queries = hook(session) 671 if hook_queries: 672 hook_results = self.exec_queries( 673 hook_queries, 674 break_on_error = break_on_error, 675 rollback=rollback, 676 silent=silent, 677 debug=debug, 678 ) 679 result = (result, hook_results) 680 681 results.append(result) 682 683 return results
Execute a list of queries in a single transaction.
Parameters
- queries (List[): Union[ str, Tuple[str, Callable[[], List[str]]] ]
- ]: The queries in the transaction to be executed. If a query is a tuple, the second item of the tuple will be considered a callable hook that returns a list of queries to be executed before the next item in the list.
- break_on_error (bool, default True):
If
True
, stop executing when a query fails. - rollback (bool, default True):
If
break_on_error
isTrue
, rollback the transaction if a query fails. - silent (bool, default False):
If
True
, suppress warnings.
Returns
- A list of SQLAlchemy results.
1025def get_connection(self, rebuild: bool = False) -> 'sqlalchemy.engine.base.Connection': 1026 """ 1027 Return the current alive connection. 1028 1029 Parameters 1030 ---------- 1031 rebuild: bool, default False 1032 If `True`, close the previous connection and open a new one. 1033 1034 Returns 1035 ------- 1036 A `sqlalchemy.engine.base.Connection` object. 1037 """ 1038 import threading 1039 if '_thread_connections' not in self.__dict__: 1040 self.__dict__['_thread_connections'] = {} 1041 1042 self._cleanup_connections() 1043 1044 thread_id = threading.get_ident() 1045 1046 thread_connections = self.__dict__.get('_thread_connections', {}) 1047 connection = thread_connections.get(thread_id, None) 1048 1049 if rebuild and connection is not None: 1050 try: 1051 connection.close() 1052 except Exception: 1053 pass 1054 1055 _ = thread_connections.pop(thread_id, None) 1056 connection = None 1057 1058 if connection is None or connection.closed: 1059 connection = self.engine.connect() 1060 thread_connections[thread_id] = connection 1061 1062 return connection
Return the current alive connection.
Parameters
- rebuild (bool, default False):
If
True
, close the previous connection and open a new one.
Returns
- A
sqlalchemy.engine.base.Connection
object.
428def test_connection( 429 self, 430 **kw: Any 431 ) -> Union[bool, None]: 432 """ 433 Test if a successful connection to the database may be made. 434 435 Parameters 436 ---------- 437 **kw: 438 The keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`. 439 440 Returns 441 ------- 442 `True` if a connection is made, otherwise `False` or `None` in case of failure. 443 444 """ 445 import warnings 446 from meerschaum.connectors.poll import retry_connect 447 _default_kw = {'max_retries': 1, 'retry_wait': 0, 'warn': False, 'connector': self} 448 _default_kw.update(kw) 449 with warnings.catch_warnings(): 450 warnings.filterwarnings('ignore', 'Could not') 451 try: 452 return retry_connect(**_default_kw) 453 except Exception as e: 454 return False
Test if a successful connection to the database may be made.
Parameters
- **kw:: The keyword arguments are passed to
meerschaum.connectors.poll.retry_connect
.
Returns
True
if a connection is made, otherwiseFalse
orNone
in case of failure.
17def fetch( 18 self, 19 pipe: mrsm.Pipe, 20 begin: Union[datetime, int, str, None] = '', 21 end: Union[datetime, int, str, None] = None, 22 check_existing: bool = True, 23 chunk_hook: Optional[Callable[['pd.DataFrame'], Any]] = None, 24 chunksize: Optional[int] = -1, 25 workers: Optional[int] = None, 26 debug: bool = False, 27 **kw: Any 28) -> Union['pd.DataFrame', List[Any], None]: 29 """Execute the SQL definition and return a Pandas DataFrame. 30 31 Parameters 32 ---------- 33 pipe: mrsm.Pipe 34 The pipe object which contains the `fetch` metadata. 35 36 - pipe.columns['datetime']: str 37 - Name of the datetime column for the remote table. 38 - pipe.parameters['fetch']: Dict[str, Any] 39 - Parameters necessary to execute a query. 40 - pipe.parameters['fetch']['definition']: str 41 - Raw SQL query to execute to generate the pandas DataFrame. 42 - pipe.parameters['fetch']['backtrack_minutes']: Union[int, float] 43 - How many minutes before `begin` to search for data (*optional*). 44 45 begin: Union[datetime, int, str, None], default None 46 Most recent datatime to search for data. 47 If `backtrack_minutes` is provided, subtract `backtrack_minutes`. 48 49 end: Union[datetime, int, str, None], default None 50 The latest datetime to search for data. 51 If `end` is `None`, do not bound 52 53 check_existing: bool, defult True 54 If `False`, use a backtrack interval of 0 minutes. 55 56 chunk_hook: Callable[[pd.DataFrame], Any], default None 57 A function to pass to `SQLConnector.read()` that accepts a Pandas DataFrame. 58 59 chunksize: Optional[int], default -1 60 How many rows to load into memory at once (when `chunk_hook` is provided). 61 Otherwise the entire result set is loaded into memory. 62 63 workers: Optional[int], default None 64 How many threads to use when consuming the generator (when `chunk_hook is provided). 65 Defaults to the number of cores. 66 67 debug: bool, default False 68 Verbosity toggle. 69 70 Returns 71 ------- 72 A pandas DataFrame or `None`. 73 If `chunk_hook` is not None, return a list of the hook function's results. 74 """ 75 meta_def = self.get_pipe_metadef( 76 pipe, 77 begin=begin, 78 end=end, 79 check_existing=check_existing, 80 debug=debug, 81 **kw 82 ) 83 as_hook_results = chunk_hook is not None 84 chunks = self.read( 85 meta_def, 86 chunk_hook=chunk_hook, 87 as_hook_results=as_hook_results, 88 chunksize=chunksize, 89 workers=workers, 90 debug=debug, 91 ) 92 ### if sqlite, parse for datetimes 93 if not as_hook_results and self.flavor == 'sqlite': 94 from meerschaum.utils.misc import parse_df_datetimes 95 ignore_cols = [ 96 col 97 for col, dtype in pipe.dtypes.items() 98 if 'datetime' not in str(dtype) 99 ] 100 return ( 101 parse_df_datetimes( 102 chunk, 103 ignore_cols=ignore_cols, 104 debug=debug, 105 ) 106 for chunk in chunks 107 ) 108 return chunks
Execute the SQL definition and return a Pandas DataFrame.
Parameters
pipe (mrsm.Pipe): The pipe object which contains the
fetch
metadata.- pipe.columns['datetime']: str
- Name of the datetime column for the remote table.
- pipe.parameters['fetch']: Dict[str, Any]
- Parameters necessary to execute a query.
- pipe.parameters['fetch']['definition']: str
- Raw SQL query to execute to generate the pandas DataFrame.
- pipe.parameters['fetch']['backtrack_minutes']: Union[int, float]
- How many minutes before
begin
to search for data (optional).
- How many minutes before
- pipe.columns['datetime']: str
- begin (Union[datetime, int, str, None], default None):
Most recent datatime to search for data.
If
backtrack_minutes
is provided, subtractbacktrack_minutes
. - end (Union[datetime, int, str, None], default None):
The latest datetime to search for data.
If
end
isNone
, do not bound - check_existing (bool, defult True):
If
False
, use a backtrack interval of 0 minutes. - chunk_hook (Callable[[pd.DataFrame], Any], default None):
A function to pass to
SQLConnector.read()
that accepts a Pandas DataFrame. - chunksize (Optional[int], default -1):
How many rows to load into memory at once (when
chunk_hook
is provided). Otherwise the entire result set is loaded into memory. - workers (Optional[int], default None): How many threads to use when consuming the generator (when `chunk_hook is provided). Defaults to the number of cores.
- debug (bool, default False): Verbosity toggle.
Returns
- A pandas DataFrame or
None
. - If
chunk_hook
is not None, return a list of the hook function's results.
111def get_pipe_metadef( 112 self, 113 pipe: mrsm.Pipe, 114 params: Optional[Dict[str, Any]] = None, 115 begin: Union[datetime, int, str, None] = '', 116 end: Union[datetime, int, str, None] = None, 117 check_existing: bool = True, 118 debug: bool = False, 119 **kw: Any 120) -> Union[str, None]: 121 """ 122 Return a pipe's meta definition fetch query. 123 124 params: Optional[Dict[str, Any]], default None 125 Optional params dictionary to build the `WHERE` clause. 126 See `meerschaum.utils.sql.build_where`. 127 128 begin: Union[datetime, int, str, None], default None 129 Most recent datatime to search for data. 130 If `backtrack_minutes` is provided, subtract `backtrack_minutes`. 131 132 end: Union[datetime, int, str, None], default None 133 The latest datetime to search for data. 134 If `end` is `None`, do not bound 135 136 check_existing: bool, default True 137 If `True`, apply the backtrack interval. 138 139 debug: bool, default False 140 Verbosity toggle. 141 142 Returns 143 ------- 144 A pipe's meta definition fetch query string. 145 """ 146 from meerschaum.utils.debug import dprint 147 from meerschaum.utils.warnings import warn, error 148 from meerschaum.utils.sql import sql_item_name, dateadd_str, build_where 149 from meerschaum.utils.misc import is_int 150 from meerschaum.config import get_config 151 152 definition = get_pipe_query(pipe) 153 154 if not pipe.columns.get('datetime', None): 155 _dt = pipe.guess_datetime() 156 dt_name = sql_item_name(_dt, self.flavor, None) if _dt else None 157 is_guess = True 158 else: 159 _dt = pipe.get_columns('datetime') 160 dt_name = sql_item_name(_dt, self.flavor, None) 161 is_guess = False 162 163 if begin not in (None, '') or end is not None: 164 if is_guess: 165 if _dt is None: 166 warn( 167 f"Unable to determine a datetime column for {pipe}." 168 + "\n Ignoring begin and end...", 169 stack = False, 170 ) 171 begin, end = '', None 172 else: 173 warn( 174 f"A datetime wasn't specified for {pipe}.\n" 175 + f" Using column \"{_dt}\" for datetime bounds...", 176 stack = False 177 ) 178 179 apply_backtrack = begin == '' and check_existing 180 backtrack_interval = pipe.get_backtrack_interval(check_existing=check_existing, debug=debug) 181 btm = ( 182 int(backtrack_interval.total_seconds() / 60) 183 if isinstance(backtrack_interval, timedelta) 184 else backtrack_interval 185 ) 186 begin = ( 187 pipe.get_sync_time(debug=debug) 188 if begin == '' 189 else begin 190 ) 191 192 if begin and end and begin >= end: 193 begin = None 194 195 if dt_name: 196 begin_da = ( 197 dateadd_str( 198 flavor=self.flavor, 199 datepart='minute', 200 number=((-1 * btm) if apply_backtrack else 0), 201 begin=begin, 202 ) 203 if begin 204 else None 205 ) 206 end_da = ( 207 dateadd_str( 208 flavor=self.flavor, 209 datepart='minute', 210 number=0, 211 begin=end, 212 ) 213 if end 214 else None 215 ) 216 217 meta_def = ( 218 _simple_fetch_query(pipe, self.flavor) if ( 219 (not (pipe.columns or {}).get('id', None)) 220 or (not get_config('system', 'experimental', 'join_fetch')) 221 ) else _join_fetch_query(pipe, self.flavor, debug=debug, **kw) 222 ) 223 224 has_where = 'where' in meta_def.lower()[meta_def.lower().rfind('definition'):] 225 if dt_name and (begin_da or end_da): 226 definition_dt_name = ( 227 dateadd_str(self.flavor, 'minute', 0, f"definition.{dt_name}") 228 if not is_int((begin_da or end_da)) 229 else f"definition.{dt_name}" 230 ) 231 meta_def += "\n" + ("AND" if has_where else "WHERE") + " " 232 has_where = True 233 if begin_da: 234 meta_def += f"{definition_dt_name} >= {begin_da}" 235 if begin_da and end_da: 236 meta_def += " AND " 237 if end_da: 238 meta_def += f"{definition_dt_name} < {end_da}" 239 240 if params is not None: 241 params_where = build_where(params, self, with_where=False) 242 meta_def += "\n" + ("AND" if has_where else "WHERE") + " " 243 has_where = True 244 meta_def += params_where 245 246 return meta_def
Return a pipe's meta definition fetch query.
params: Optional[Dict[str, Any]], default None
Optional params dictionary to build the WHERE
clause.
See meerschaum.utils.sql.build_where
.
begin: Union[datetime, int, str, None], default None
Most recent datatime to search for data.
If backtrack_minutes
is provided, subtract backtrack_minutes
.
end: Union[datetime, int, str, None], default None
The latest datetime to search for data.
If end
is None
, do not bound
check_existing: bool, default True
If True
, apply the backtrack interval.
debug: bool, default False Verbosity toggle.
Returns
- A pipe's meta definition fetch query string.
35def cli( 36 self, 37 debug: bool = False, 38 ) -> SuccessTuple: 39 """ 40 Launch a subprocess for an interactive CLI. 41 """ 42 from meerschaum.utils.venv import venv_exec 43 env = copy.deepcopy(dict(os.environ)) 44 env[f'MRSM_SQL_{self.label.upper()}'] = json.dumps(self.meta) 45 cli_code = ( 46 "import sys\n" 47 "import meerschaum as mrsm\n" 48 f"conn = mrsm.get_connector('sql:{self.label}')\n" 49 "success, msg = conn._cli_exit()\n" 50 "mrsm.pprint((success, msg))\n" 51 "if not success:\n" 52 " raise Exception(msg)" 53 ) 54 try: 55 _ = venv_exec(cli_code, venv=None, debug=debug, capture_output=False) 56 except Exception as e: 57 return False, f"[{self}] Failed to start CLI:\n{e}" 58 return True, "Success"
Launch a subprocess for an interactive CLI.
144def fetch_pipes_keys( 145 self, 146 connector_keys: Optional[List[str]] = None, 147 metric_keys: Optional[List[str]] = None, 148 location_keys: Optional[List[str]] = None, 149 tags: Optional[List[str]] = None, 150 params: Optional[Dict[str, Any]] = None, 151 debug: bool = False 152) -> Optional[List[Tuple[str, str, Optional[str]]]]: 153 """ 154 Return a list of tuples corresponding to the parameters provided. 155 156 Parameters 157 ---------- 158 connector_keys: Optional[List[str]], default None 159 List of connector_keys to search by. 160 161 metric_keys: Optional[List[str]], default None 162 List of metric_keys to search by. 163 164 location_keys: Optional[List[str]], default None 165 List of location_keys to search by. 166 167 params: Optional[Dict[str, Any]], default None 168 Dictionary of additional parameters to search by. 169 E.g. `--params pipe_id:1` 170 171 debug: bool, default False 172 Verbosity toggle. 173 """ 174 from meerschaum.utils.debug import dprint 175 from meerschaum.utils.packages import attempt_import 176 from meerschaum.utils.misc import separate_negation_values, flatten_list 177 from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists 178 from meerschaum.config.static import STATIC_CONFIG 179 import json 180 from copy import deepcopy 181 sqlalchemy, sqlalchemy_sql_functions = attempt_import('sqlalchemy', 'sqlalchemy.sql.functions') 182 coalesce = sqlalchemy_sql_functions.coalesce 183 184 if connector_keys is None: 185 connector_keys = [] 186 if metric_keys is None: 187 metric_keys = [] 188 if location_keys is None: 189 location_keys = [] 190 else: 191 location_keys = [ 192 ( 193 lk 194 if lk not in ('[None]', 'None', 'null') 195 else 'None' 196 ) 197 for lk in location_keys 198 ] 199 if tags is None: 200 tags = [] 201 202 if params is None: 203 params = {} 204 205 ### Add three primary keys to params dictionary 206 ### (separated for convenience of arguments). 207 cols = { 208 'connector_keys': [str(ck) for ck in connector_keys], 209 'metric_key': [str(mk) for mk in metric_keys], 210 'location_key': [str(lk) for lk in location_keys], 211 } 212 213 ### Make deep copy so we don't mutate this somewhere else. 214 parameters = deepcopy(params) 215 for col, vals in cols.items(): 216 if vals not in [[], ['*']]: 217 parameters[col] = vals 218 219 if not table_exists('mrsm_pipes', self, schema=self.instance_schema, debug=debug): 220 return [] 221 222 from meerschaum.connectors.sql.tables import get_tables 223 pipes_tbl = get_tables(mrsm_instance=self, create=False, debug=debug)['pipes'] 224 225 _params = {} 226 for k, v in parameters.items(): 227 _v = json.dumps(v) if isinstance(v, dict) else v 228 _params[k] = _v 229 230 negation_prefix = STATIC_CONFIG['system']['fetch_pipes_keys']['negation_prefix'] 231 ### Parse regular params. 232 ### If a param begins with '_', negate it instead. 233 _where = [ 234 ( 235 (coalesce(pipes_tbl.c[key], 'None') == val) 236 if not str(val).startswith(negation_prefix) 237 else (pipes_tbl.c[key] != key) 238 ) for key, val in _params.items() 239 if not isinstance(val, (list, tuple)) and key in pipes_tbl.c 240 ] 241 select_cols = ( 242 [ 243 pipes_tbl.c.connector_keys, 244 pipes_tbl.c.metric_key, 245 pipes_tbl.c.location_key, 246 ] 247 ) 248 249 q = sqlalchemy.select(*select_cols).where(sqlalchemy.and_(True, *_where)) 250 for c, vals in cols.items(): 251 if not isinstance(vals, (list, tuple)) or not vals or not c in pipes_tbl.c: 252 continue 253 _in_vals, _ex_vals = separate_negation_values(vals) 254 q = q.where(coalesce(pipes_tbl.c[c], 'None').in_(_in_vals)) if _in_vals else q 255 q = q.where(coalesce(pipes_tbl.c[c], 'None').not_in(_ex_vals)) if _ex_vals else q 256 257 ### Finally, parse tags. 258 tag_groups = [tag.split(',') for tag in tags] 259 in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups] 260 261 ors, nands = [], [] 262 for _in_tags, _ex_tags in in_ex_tag_groups: 263 sub_ands = [] 264 for nt in _in_tags: 265 sub_ands.append( 266 sqlalchemy.cast( 267 pipes_tbl.c['parameters'], 268 sqlalchemy.String, 269 ).like(f'%"tags":%"{nt}"%') 270 ) 271 if sub_ands: 272 ors.append(sqlalchemy.and_(*sub_ands)) 273 274 for xt in _ex_tags: 275 nands.append( 276 sqlalchemy.cast( 277 pipes_tbl.c['parameters'], 278 sqlalchemy.String, 279 ).not_like(f'%"tags":%"{xt}"%') 280 ) 281 282 q = q.where(sqlalchemy.and_(*nands)) if nands else q 283 q = q.where(sqlalchemy.or_(*ors)) if ors else q 284 loc_asc = sqlalchemy.asc(pipes_tbl.c['location_key']) 285 if self.flavor not in OMIT_NULLSFIRST_FLAVORS: 286 loc_asc = sqlalchemy.nullsfirst(loc_asc) 287 q = q.order_by( 288 sqlalchemy.asc(pipes_tbl.c['connector_keys']), 289 sqlalchemy.asc(pipes_tbl.c['metric_key']), 290 loc_asc, 291 ) 292 293 ### execute the query and return a list of tuples 294 if debug: 295 dprint(q.compile(compile_kwargs={'literal_binds': True})) 296 try: 297 rows = ( 298 self.execute(q).fetchall() 299 if self.flavor != 'duckdb' 300 else [ 301 (row['connector_keys'], row['metric_key'], row['location_key']) 302 for row in self.read(q).to_dict(orient='records') 303 ] 304 ) 305 except Exception as e: 306 error(str(e)) 307 308 return [(row[0], row[1], row[2]) for row in rows]
Return a list of tuples corresponding to the parameters provided.
Parameters
- connector_keys (Optional[List[str]], default None): List of connector_keys to search by.
- metric_keys (Optional[List[str]], default None): List of metric_keys to search by.
- location_keys (Optional[List[str]], default None): List of location_keys to search by.
- params (Optional[Dict[str, Any]], default None):
Dictionary of additional parameters to search by.
E.g.
--params pipe_id:1
- debug (bool, default False): Verbosity toggle.
311def create_indices( 312 self, 313 pipe: mrsm.Pipe, 314 indices: Optional[List[str]] = None, 315 debug: bool = False 316) -> bool: 317 """ 318 Create a pipe's indices. 319 """ 320 from meerschaum.utils.sql import sql_item_name, update_queries 321 from meerschaum.utils.debug import dprint 322 if debug: 323 dprint(f"Creating indices for {pipe}...") 324 if not pipe.columns: 325 warn(f"{pipe} has no index columns; skipping index creation.", stack=False) 326 return True 327 328 ix_queries = { 329 ix: queries 330 for ix, queries in self.get_create_index_queries(pipe, debug=debug).items() 331 if indices is None or ix in indices 332 } 333 success = True 334 for ix, queries in ix_queries.items(): 335 ix_success = all(self.exec_queries(queries, debug=debug, silent=False)) 336 success = success and ix_success 337 if not ix_success: 338 warn(f"Failed to create index on column: {ix}") 339 340 return success
Create a pipe's indices.
343def drop_indices( 344 self, 345 pipe: mrsm.Pipe, 346 indices: Optional[List[str]] = None, 347 debug: bool = False 348) -> bool: 349 """ 350 Drop a pipe's indices. 351 """ 352 from meerschaum.utils.debug import dprint 353 if debug: 354 dprint(f"Dropping indices for {pipe}...") 355 if not pipe.columns: 356 warn(f"Unable to drop indices for {pipe} without columns.", stack=False) 357 return False 358 ix_queries = { 359 ix: queries 360 for ix, queries in self.get_drop_index_queries(pipe, debug=debug).items() 361 if indices is None or ix in indices 362 } 363 success = True 364 for ix, queries in ix_queries.items(): 365 ix_success = all(self.exec_queries(queries, debug=debug, silent=True)) 366 if not ix_success: 367 success = False 368 if debug: 369 dprint(f"Failed to drop index on column: {ix}") 370 return success
Drop a pipe's indices.
373def get_create_index_queries( 374 self, 375 pipe: mrsm.Pipe, 376 debug: bool = False, 377) -> Dict[str, List[str]]: 378 """ 379 Return a dictionary mapping columns to a `CREATE INDEX` or equivalent query. 380 381 Parameters 382 ---------- 383 pipe: mrsm.Pipe 384 The pipe to which the queries will correspond. 385 386 Returns 387 ------- 388 A dictionary of index names mapping to lists of queries. 389 """ 390 ### NOTE: Due to recent breaking changes in DuckDB, indices don't behave properly. 391 if self.flavor == 'duckdb': 392 return {} 393 from meerschaum.utils.sql import ( 394 sql_item_name, 395 get_distinct_col_count, 396 update_queries, 397 get_null_replacement, 398 COALESCE_UNIQUE_INDEX_FLAVORS, 399 ) 400 from meerschaum.config import get_config 401 index_queries = {} 402 403 upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries 404 index_names = pipe.get_indices() 405 indices = pipe.indices 406 407 _datetime = pipe.get_columns('datetime', error=False) 408 _datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns]') 409 _datetime_name = ( 410 sql_item_name(_datetime, self.flavor, None) 411 if _datetime is not None else None 412 ) 413 _datetime_index_name = ( 414 sql_item_name(index_names['datetime'], self.flavor, None) 415 if index_names.get('datetime', None) 416 else None 417 ) 418 _id = pipe.get_columns('id', error=False) 419 _id_name = ( 420 sql_item_name(_id, self.flavor, None) 421 if _id is not None 422 else None 423 ) 424 425 _id_index_name = ( 426 sql_item_name(index_names['id'], self.flavor, None) 427 if index_names.get('id', None) 428 else None 429 ) 430 _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 431 _create_space_partition = get_config('system', 'experimental', 'space') 432 433 ### create datetime index 434 if _datetime is not None: 435 if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True): 436 _id_count = ( 437 get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self) 438 if (_id is not None and _create_space_partition) else None 439 ) 440 441 chunk_interval = pipe.get_chunk_interval(debug=debug) 442 chunk_interval_minutes = ( 443 chunk_interval 444 if isinstance(chunk_interval, int) 445 else int(chunk_interval.total_seconds() / 60) 446 ) 447 chunk_time_interval = ( 448 f"INTERVAL '{chunk_interval_minutes} MINUTES'" 449 if isinstance(chunk_interval, timedelta) 450 else f'{chunk_interval_minutes}' 451 ) 452 453 dt_query = ( 454 f"SELECT public.create_hypertable('{_pipe_name}', " + 455 f"'{_datetime}', " 456 + ( 457 f"'{_id}', {_id_count}, " if (_id is not None and _create_space_partition) 458 else '' 459 ) 460 + f'chunk_time_interval => {chunk_time_interval}, ' 461 + 'if_not_exists => true, ' 462 + "migrate_data => true);" 463 ) 464 elif self.flavor == 'mssql': 465 dt_query = ( 466 f"CREATE CLUSTERED INDEX {_datetime_index_name} " 467 f"ON {_pipe_name} ({_datetime_name})" 468 ) 469 else: ### mssql, sqlite, etc. 470 dt_query = ( 471 f"CREATE INDEX {_datetime_index_name} " 472 + f"ON {_pipe_name} ({_datetime_name})" 473 ) 474 475 index_queries[_datetime] = [dt_query] 476 477 ### create id index 478 if _id_name is not None: 479 if self.flavor == 'timescaledb': 480 ### Already created indices via create_hypertable. 481 id_query = ( 482 None if (_id is not None and _create_space_partition) 483 else ( 484 f"CREATE INDEX IF NOT EXISTS {_id_index_name} ON {_pipe_name} ({_id_name})" 485 if _id is not None 486 else None 487 ) 488 ) 489 pass 490 else: ### mssql, sqlite, etc. 491 id_query = f"CREATE INDEX {_id_index_name} ON {_pipe_name} ({_id_name})" 492 493 if id_query is not None: 494 index_queries[_id] = id_query if isinstance(id_query, list) else [id_query] 495 496 ### Create indices for other labels in `pipe.columns`. 497 other_index_names = { 498 ix_key: ix_unquoted 499 for ix_key, ix_unquoted in index_names.items() 500 if ix_key not in ('datetime', 'id') 501 } 502 for ix_key, ix_unquoted in other_index_names.items(): 503 ix_name = sql_item_name(ix_unquoted, self.flavor, None) 504 cols = indices[ix_key] 505 if not isinstance(cols, (list, tuple)): 506 cols = [cols] 507 cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col] 508 if not cols_names: 509 continue 510 cols_names_str = ", ".join(cols_names) 511 index_queries[ix_key] = [f"CREATE INDEX {ix_name} ON {_pipe_name} ({cols_names_str})"] 512 513 existing_cols_types = pipe.get_columns_types(debug=debug) 514 indices_cols_str = ', '.join( 515 [ 516 sql_item_name(ix, self.flavor) 517 for ix_key, ix in pipe.columns.items() 518 if ix and ix in existing_cols_types 519 ] 520 ) 521 coalesce_indices_cols_str = ', '.join( 522 [ 523 ( 524 "COALESCE(" 525 + sql_item_name(ix, self.flavor) 526 + ", " 527 + get_null_replacement(existing_cols_types[ix], self.flavor) 528 + ") " 529 ) if ix_key != 'datetime' else (sql_item_name(ix, self.flavor)) 530 for ix_key, ix in pipe.columns.items() 531 if ix and ix in existing_cols_types 532 ] 533 ) 534 unique_index_name = sql_item_name(pipe.target + '_unique_index', self.flavor) 535 constraint_name = sql_item_name(pipe.target + '_constraint', self.flavor) 536 add_constraint_query = ( 537 f"ALTER TABLE {_pipe_name} ADD CONSTRAINT {constraint_name} UNIQUE ({indices_cols_str})" 538 ) 539 unique_index_cols_str = ( 540 indices_cols_str 541 if self.flavor not in COALESCE_UNIQUE_INDEX_FLAVORS 542 else coalesce_indices_cols_str 543 ) 544 create_unique_index_query = ( 545 f"CREATE UNIQUE INDEX {unique_index_name} ON {_pipe_name} ({unique_index_cols_str})" 546 ) 547 constraint_queries = [create_unique_index_query] 548 if self.flavor != 'sqlite': 549 constraint_queries.append(add_constraint_query) 550 if upsert and indices_cols_str: 551 index_queries[unique_index_name] = constraint_queries 552 return index_queries
Return a dictionary mapping columns to a CREATE INDEX
or equivalent query.
Parameters
- pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
- A dictionary of index names mapping to lists of queries.
555def get_drop_index_queries( 556 self, 557 pipe: mrsm.Pipe, 558 debug: bool = False, 559) -> Dict[str, List[str]]: 560 """ 561 Return a dictionary mapping columns to a `DROP INDEX` or equivalent query. 562 563 Parameters 564 ---------- 565 pipe: mrsm.Pipe 566 The pipe to which the queries will correspond. 567 568 Returns 569 ------- 570 A dictionary of column names mapping to lists of queries. 571 """ 572 ### NOTE: Due to breaking changes within DuckDB, indices must be skipped. 573 if self.flavor == 'duckdb': 574 return {} 575 if not pipe.exists(debug=debug): 576 return {} 577 from meerschaum.utils.sql import ( 578 sql_item_name, 579 table_exists, 580 hypertable_queries, 581 DROP_IF_EXISTS_FLAVORS, 582 ) 583 drop_queries = {} 584 schema = self.get_pipe_schema(pipe) 585 schema_prefix = (schema + '_') if schema else '' 586 indices = { 587 col: schema_prefix + ix 588 for col, ix in pipe.get_indices().items() 589 } 590 pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 591 pipe_name_no_schema = sql_item_name(pipe.target, self.flavor, None) 592 593 if self.flavor not in hypertable_queries: 594 is_hypertable = False 595 else: 596 is_hypertable_query = hypertable_queries[self.flavor].format(table_name=pipe_name) 597 is_hypertable = self.value(is_hypertable_query, silent=True, debug=debug) is not None 598 599 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 600 if is_hypertable: 601 nuke_queries = [] 602 temp_table = '_' + pipe.target + '_temp_migration' 603 temp_table_name = sql_item_name(temp_table, self.flavor, self.get_pipe_schema(pipe)) 604 605 if table_exists(temp_table, self, schema=self.get_pipe_schema(pipe), debug=debug): 606 nuke_queries.append(f"DROP TABLE {if_exists_str} {temp_table_name}") 607 nuke_queries += [ 608 f"SELECT * INTO {temp_table_name} FROM {pipe_name}", 609 f"DROP TABLE {if_exists_str} {pipe_name}", 610 f"ALTER TABLE {temp_table_name} RENAME TO {pipe_name_no_schema}", 611 ] 612 nuke_ix_keys = ('datetime', 'id') 613 nuked = False 614 for ix_key in nuke_ix_keys: 615 if ix_key in indices and not nuked: 616 drop_queries[ix_key] = nuke_queries 617 nuked = True 618 619 drop_queries.update({ 620 ix_key: ["DROP INDEX " + sql_item_name(ix_unquoted, self.flavor, None)] 621 for ix_key, ix_unquoted in indices.items() 622 if ix_key not in drop_queries 623 }) 624 return drop_queries
Return a dictionary mapping columns to a DROP INDEX
or equivalent query.
Parameters
- pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
- A dictionary of column names mapping to lists of queries.
2413def get_add_columns_queries( 2414 self, 2415 pipe: mrsm.Pipe, 2416 df: Union[pd.DataFrame, Dict[str, str]], 2417 _is_db_types: bool = False, 2418 debug: bool = False, 2419) -> List[str]: 2420 """ 2421 Add new null columns of the correct type to a table from a dataframe. 2422 2423 Parameters 2424 ---------- 2425 pipe: mrsm.Pipe 2426 The pipe to be altered. 2427 2428 df: Union[pd.DataFrame, Dict[str, str]] 2429 The pandas DataFrame which contains new columns. 2430 If a dictionary is provided, assume it maps columns to Pandas data types. 2431 2432 _is_db_types: bool, default False 2433 If `True`, assume `df` is a dictionary mapping columns to SQL native dtypes. 2434 2435 Returns 2436 ------- 2437 A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector. 2438 """ 2439 if not pipe.exists(debug=debug): 2440 return [] 2441 2442 from decimal import Decimal 2443 import copy 2444 from meerschaum.utils.sql import ( 2445 sql_item_name, 2446 SINGLE_ALTER_TABLE_FLAVORS, 2447 get_table_cols_types, 2448 ) 2449 from meerschaum.utils.dtypes.sql import ( 2450 get_pd_type_from_db_type, 2451 get_db_type_from_pd_type, 2452 ) 2453 from meerschaum.utils.misc import flatten_list 2454 table_obj = self.get_pipe_table(pipe, debug=debug) 2455 is_dask = 'dask' in df.__module__ if not isinstance(df, dict) else False 2456 if is_dask: 2457 df = df.partitions[0].compute() 2458 df_cols_types = ( 2459 { 2460 col: str(typ) 2461 for col, typ in df.dtypes.items() 2462 } 2463 if not isinstance(df, dict) 2464 else copy.deepcopy(df) 2465 ) 2466 if not isinstance(df, dict) and len(df.index) > 0: 2467 for col, typ in list(df_cols_types.items()): 2468 if typ != 'object': 2469 continue 2470 val = df.iloc[0][col] 2471 if isinstance(val, (dict, list)): 2472 df_cols_types[col] = 'json' 2473 elif isinstance(val, Decimal): 2474 df_cols_types[col] = 'numeric' 2475 elif isinstance(val, str): 2476 df_cols_types[col] = 'str' 2477 db_cols_types = { 2478 col: get_pd_type_from_db_type(str(typ.type)) 2479 for col, typ in table_obj.columns.items() 2480 } if table_obj is not None else { 2481 col: get_pd_type_from_db_type(typ) 2482 for col, typ in get_table_cols_types( 2483 pipe.target, 2484 self, 2485 schema=self.get_pipe_schema(pipe), 2486 debug=debug, 2487 ).items() 2488 } 2489 new_cols = set(df_cols_types) - set(db_cols_types) 2490 if not new_cols: 2491 return [] 2492 2493 new_cols_types = { 2494 col: get_db_type_from_pd_type( 2495 df_cols_types[col], 2496 self.flavor 2497 ) for col in new_cols 2498 } 2499 2500 alter_table_query = "ALTER TABLE " + sql_item_name( 2501 pipe.target, self.flavor, self.get_pipe_schema(pipe) 2502 ) 2503 queries = [] 2504 for col, typ in new_cols_types.items(): 2505 add_col_query = ( 2506 "\nADD " 2507 + sql_item_name(col, self.flavor, None) 2508 + " " + typ + "," 2509 ) 2510 2511 if self.flavor in SINGLE_ALTER_TABLE_FLAVORS: 2512 queries.append(alter_table_query + add_col_query[:-1]) 2513 else: 2514 alter_table_query += add_col_query 2515 2516 ### For most flavors, only one query is required. 2517 ### This covers SQLite which requires one query per column. 2518 if not queries: 2519 queries.append(alter_table_query[:-1]) 2520 2521 if self.flavor != 'duckdb': 2522 return queries 2523 2524 ### NOTE: For DuckDB, we must drop and rebuild the indices. 2525 drop_index_queries = list(flatten_list( 2526 [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()] 2527 )) 2528 create_index_queries = list(flatten_list( 2529 [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()] 2530 )) 2531 2532 return drop_index_queries + queries + create_index_queries
Add new null columns of the correct type to a table from a dataframe.
Parameters
- pipe (mrsm.Pipe): The pipe to be altered.
- df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which contains new columns. If a dictionary is provided, assume it maps columns to Pandas data types.
- _is_db_types (bool, default False):
If
True
, assumedf
is a dictionary mapping columns to SQL native dtypes.
Returns
- A list of the
ALTER TABLE
SQL query or queries to be executed on the provided connector.
2535def get_alter_columns_queries( 2536 self, 2537 pipe: mrsm.Pipe, 2538 df: Union[pd.DataFrame, Dict[str, str]], 2539 debug: bool = False, 2540) -> List[str]: 2541 """ 2542 If we encounter a column of a different type, set the entire column to text. 2543 If the altered columns are numeric, alter to numeric instead. 2544 2545 Parameters 2546 ---------- 2547 pipe: mrsm.Pipe 2548 The pipe to be altered. 2549 2550 df: Union[pd.DataFrame, Dict[str, str]] 2551 The pandas DataFrame which may contain altered columns. 2552 If a dict is provided, assume it maps columns to Pandas data types. 2553 2554 Returns 2555 ------- 2556 A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector. 2557 """ 2558 if not pipe.exists(debug=debug): 2559 return [] 2560 from meerschaum.utils.sql import sql_item_name, DROP_IF_EXISTS_FLAVORS, get_table_cols_types 2561 from meerschaum.utils.dataframe import get_numeric_cols 2562 from meerschaum.utils.dtypes import are_dtypes_equal 2563 from meerschaum.utils.dtypes.sql import ( 2564 get_pd_type_from_db_type, 2565 get_db_type_from_pd_type, 2566 ) 2567 from meerschaum.utils.misc import flatten_list, generate_password, items_str 2568 table_obj = self.get_pipe_table(pipe, debug=debug) 2569 target = pipe.target 2570 session_id = generate_password(3) 2571 numeric_cols = ( 2572 get_numeric_cols(df) 2573 if not isinstance(df, dict) 2574 else [ 2575 col 2576 for col, typ in df.items() 2577 if typ == 'numeric' 2578 ] 2579 ) 2580 df_cols_types = ( 2581 { 2582 col: str(typ) 2583 for col, typ in df.dtypes.items() 2584 } 2585 if not isinstance(df, dict) 2586 else df 2587 ) 2588 db_cols_types = { 2589 col: get_pd_type_from_db_type(str(typ.type)) 2590 for col, typ in table_obj.columns.items() 2591 } if table_obj is not None else { 2592 col: get_pd_type_from_db_type(typ) 2593 for col, typ in get_table_cols_types( 2594 pipe.target, 2595 self, 2596 schema=self.get_pipe_schema(pipe), 2597 debug=debug, 2598 ).items() 2599 } 2600 pipe_bool_cols = [col for col, typ in pipe.dtypes.items() if are_dtypes_equal(str(typ), 'bool')] 2601 pd_db_df_aliases = { 2602 'int': 'bool', 2603 'float': 'bool', 2604 'numeric': 'bool', 2605 'guid': 'object', 2606 } 2607 if self.flavor == 'oracle': 2608 pd_db_df_aliases['int'] = 'numeric' 2609 2610 altered_cols = { 2611 col: (db_cols_types.get(col, 'object'), typ) 2612 for col, typ in df_cols_types.items() 2613 if not are_dtypes_equal(typ, db_cols_types.get(col, 'object').lower()) 2614 and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string') 2615 } 2616 2617 ### NOTE: Sometimes bools are coerced into ints or floats. 2618 altered_cols_to_ignore = set() 2619 for col, (db_typ, df_typ) in altered_cols.items(): 2620 for db_alias, df_alias in pd_db_df_aliases.items(): 2621 if db_alias in db_typ.lower() and df_alias in df_typ.lower(): 2622 altered_cols_to_ignore.add(col) 2623 2624 ### Oracle's bool handling sometimes mixes NUMBER and INT. 2625 for bool_col in pipe_bool_cols: 2626 if bool_col not in altered_cols: 2627 continue 2628 db_is_bool_compatible = ( 2629 are_dtypes_equal('int', altered_cols[bool_col][0]) 2630 or are_dtypes_equal('float', altered_cols[bool_col][0]) 2631 or are_dtypes_equal('numeric', altered_cols[bool_col][0]) 2632 or are_dtypes_equal('bool', altered_cols[bool_col][0]) 2633 ) 2634 df_is_bool_compatible = ( 2635 are_dtypes_equal('int', altered_cols[bool_col][1]) 2636 or are_dtypes_equal('float', altered_cols[bool_col][1]) 2637 or are_dtypes_equal('numeric', altered_cols[bool_col][1]) 2638 or are_dtypes_equal('bool', altered_cols[bool_col][1]) 2639 ) 2640 if db_is_bool_compatible and df_is_bool_compatible: 2641 altered_cols_to_ignore.add(bool_col) 2642 2643 for col in altered_cols_to_ignore: 2644 _ = altered_cols.pop(col, None) 2645 if not altered_cols: 2646 return [] 2647 2648 if numeric_cols: 2649 pipe.dtypes.update({col: 'numeric' for col in numeric_cols}) 2650 edit_success, edit_msg = pipe.edit(debug=debug) 2651 if not edit_success: 2652 warn( 2653 f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n" 2654 + f"{edit_msg}" 2655 ) 2656 else: 2657 numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ == 'numeric']) 2658 2659 numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False) 2660 text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False) 2661 altered_cols_types = { 2662 col: ( 2663 numeric_type 2664 if col in numeric_cols 2665 else text_type 2666 ) 2667 for col, (db_typ, typ) in altered_cols.items() 2668 } 2669 2670 if self.flavor == 'sqlite': 2671 temp_table_name = '-' + session_id + '_' + target 2672 rename_query = ( 2673 "ALTER TABLE " 2674 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2675 + " RENAME TO " 2676 + sql_item_name(temp_table_name, self.flavor, None) 2677 ) 2678 create_query = ( 2679 "CREATE TABLE " 2680 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2681 + " (\n" 2682 ) 2683 for col_name, col_obj in table_obj.columns.items(): 2684 create_query += ( 2685 sql_item_name(col_name, self.flavor, None) 2686 + " " 2687 + ( 2688 str(col_obj.type) 2689 if col_name not in altered_cols 2690 else altered_cols_types[col_name] 2691 ) 2692 + ",\n" 2693 ) 2694 create_query = create_query[:-2] + "\n)" 2695 2696 insert_query = ( 2697 "INSERT INTO " 2698 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2699 + ' (' 2700 + ', '.join([ 2701 sql_item_name(col_name, self.flavor, None) 2702 for col_name, _ in table_obj.columns.items() 2703 ]) 2704 + ')' 2705 + "\nSELECT\n" 2706 ) 2707 for col_name, col_obj in table_obj.columns.items(): 2708 new_col_str = ( 2709 sql_item_name(col_name, self.flavor, None) 2710 if col_name not in altered_cols 2711 else ( 2712 "CAST(" 2713 + sql_item_name(col_name, self.flavor, None) 2714 + " AS " 2715 + altered_cols_types[col_name] 2716 + ")" 2717 ) 2718 ) 2719 insert_query += new_col_str + ",\n" 2720 insert_query = insert_query[:-2] + ( 2721 f"\nFROM {sql_item_name(temp_table_name, self.flavor, self.get_pipe_schema(pipe))}" 2722 ) 2723 2724 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 2725 2726 drop_query = f"DROP TABLE {if_exists_str}" + sql_item_name( 2727 temp_table_name, self.flavor, self.get_pipe_schema(pipe) 2728 ) 2729 return [ 2730 rename_query, 2731 create_query, 2732 insert_query, 2733 drop_query, 2734 ] 2735 2736 queries = [] 2737 if self.flavor == 'oracle': 2738 for col, typ in altered_cols_types.items(): 2739 add_query = ( 2740 "ALTER TABLE " 2741 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2742 + "\nADD " + sql_item_name(col + '_temp', self.flavor, None) 2743 + " " + typ 2744 ) 2745 queries.append(add_query) 2746 2747 for col, typ in altered_cols_types.items(): 2748 populate_temp_query = ( 2749 "UPDATE " 2750 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2751 + "\nSET " + sql_item_name(col + '_temp', self.flavor, None) 2752 + ' = ' + sql_item_name(col, self.flavor, None) 2753 ) 2754 queries.append(populate_temp_query) 2755 2756 for col, typ in altered_cols_types.items(): 2757 set_old_cols_to_null_query = ( 2758 "UPDATE " 2759 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2760 + "\nSET " + sql_item_name(col, self.flavor, None) 2761 + ' = NULL' 2762 ) 2763 queries.append(set_old_cols_to_null_query) 2764 2765 for col, typ in altered_cols_types.items(): 2766 alter_type_query = ( 2767 "ALTER TABLE " 2768 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2769 + "\nMODIFY " + sql_item_name(col, self.flavor, None) + ' ' 2770 + typ 2771 ) 2772 queries.append(alter_type_query) 2773 2774 for col, typ in altered_cols_types.items(): 2775 set_old_to_temp_query = ( 2776 "UPDATE " 2777 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2778 + "\nSET " + sql_item_name(col, self.flavor, None) 2779 + ' = ' + sql_item_name(col + '_temp', self.flavor, None) 2780 ) 2781 queries.append(set_old_to_temp_query) 2782 2783 for col, typ in altered_cols_types.items(): 2784 drop_temp_query = ( 2785 "ALTER TABLE " 2786 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2787 + "\nDROP COLUMN " + sql_item_name(col + '_temp', self.flavor, None) 2788 ) 2789 queries.append(drop_temp_query) 2790 2791 return queries 2792 2793 2794 query = "ALTER TABLE " + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2795 for col, typ in altered_cols_types.items(): 2796 alter_col_prefix = ( 2797 'ALTER' if self.flavor not in ('mysql', 'mariadb', 'oracle') 2798 else 'MODIFY' 2799 ) 2800 type_prefix = ( 2801 '' if self.flavor in ('mssql', 'mariadb', 'mysql') 2802 else 'TYPE ' 2803 ) 2804 column_str = 'COLUMN' if self.flavor != 'oracle' else '' 2805 query += ( 2806 f"\n{alter_col_prefix} {column_str} " 2807 + sql_item_name(col, self.flavor, None) 2808 + " " + type_prefix + typ + "," 2809 ) 2810 2811 query = query[:-1] 2812 queries.append(query) 2813 if self.flavor != 'duckdb': 2814 return queries 2815 2816 drop_index_queries = list(flatten_list( 2817 [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()] 2818 )) 2819 create_index_queries = list(flatten_list( 2820 [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()] 2821 )) 2822 2823 return drop_index_queries + queries + create_index_queries
If we encounter a column of a different type, set the entire column to text. If the altered columns are numeric, alter to numeric instead.
Parameters
- pipe (mrsm.Pipe): The pipe to be altered.
- df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which may contain altered columns. If a dict is provided, assume it maps columns to Pandas data types.
Returns
- A list of the
ALTER TABLE
SQL query or queries to be executed on the provided connector.
627def delete_pipe( 628 self, 629 pipe: mrsm.Pipe, 630 debug: bool = False, 631) -> SuccessTuple: 632 """ 633 Delete a Pipe's registration. 634 """ 635 from meerschaum.utils.sql import sql_item_name 636 from meerschaum.utils.debug import dprint 637 from meerschaum.utils.packages import attempt_import 638 sqlalchemy = attempt_import('sqlalchemy') 639 640 if not pipe.id: 641 return False, f"{pipe} is not registered." 642 643 ### ensure pipes table exists 644 from meerschaum.connectors.sql.tables import get_tables 645 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 646 647 q = sqlalchemy.delete(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id) 648 if not self.exec(q, debug=debug): 649 return False, f"Failed to delete registration for {pipe}." 650 651 return True, "Success"
Delete a Pipe's registration.
654def get_pipe_data( 655 self, 656 pipe: mrsm.Pipe, 657 select_columns: Optional[List[str]] = None, 658 omit_columns: Optional[List[str]] = None, 659 begin: Union[datetime, str, None] = None, 660 end: Union[datetime, str, None] = None, 661 params: Optional[Dict[str, Any]] = None, 662 order: str = 'asc', 663 limit: Optional[int] = None, 664 begin_add_minutes: int = 0, 665 end_add_minutes: int = 0, 666 debug: bool = False, 667 **kw: Any 668) -> Union[pd.DataFrame, None]: 669 """ 670 Access a pipe's data from the SQL instance. 671 672 Parameters 673 ---------- 674 pipe: mrsm.Pipe: 675 The pipe to get data from. 676 677 select_columns: Optional[List[str]], default None 678 If provided, only select these given columns. 679 Otherwise select all available columns (i.e. `SELECT *`). 680 681 omit_columns: Optional[List[str]], default None 682 If provided, remove these columns from the selection. 683 684 begin: Union[datetime, str, None], default None 685 If provided, get rows newer than or equal to this value. 686 687 end: Union[datetime, str, None], default None 688 If provided, get rows older than or equal to this value. 689 690 params: Optional[Dict[str, Any]], default None 691 Additional parameters to filter by. 692 See `meerschaum.connectors.sql.build_where`. 693 694 order: Optional[str], default 'asc' 695 The selection order for all of the indices in the query. 696 If `None`, omit the `ORDER BY` clause. 697 698 limit: Optional[int], default None 699 If specified, limit the number of rows retrieved to this value. 700 701 begin_add_minutes: int, default 0 702 The number of minutes to add to the `begin` datetime (i.e. `DATEADD`. 703 704 end_add_minutes: int, default 0 705 The number of minutes to add to the `end` datetime (i.e. `DATEADD`. 706 707 chunksize: Optional[int], default -1 708 The size of dataframe chunks to load into memory. 709 710 debug: bool, default False 711 Verbosity toggle. 712 713 Returns 714 ------- 715 A `pd.DataFrame` of the pipe's data. 716 717 """ 718 import json 719 from meerschaum.utils.sql import sql_item_name 720 from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype 721 from meerschaum.utils.packages import import_pandas 722 from meerschaum.utils.dtypes import attempt_cast_to_numeric, attempt_cast_to_uuid 723 pd = import_pandas() 724 is_dask = 'dask' in pd.__name__ 725 726 dtypes = pipe.dtypes 727 if dtypes: 728 if self.flavor == 'sqlite': 729 if not pipe.columns.get('datetime', None): 730 _dt = pipe.guess_datetime() 731 dt = sql_item_name(_dt, self.flavor, None) if _dt else None 732 is_guess = True 733 else: 734 _dt = pipe.get_columns('datetime') 735 dt = sql_item_name(_dt, self.flavor, None) 736 is_guess = False 737 738 if _dt: 739 dt_type = dtypes.get(_dt, 'object').lower() 740 if 'datetime' not in dt_type: 741 if 'int' not in dt_type: 742 dtypes[_dt] = 'datetime64[ns]' 743 existing_cols = pipe.get_columns_types(debug=debug) 744 select_columns = ( 745 [ 746 col 747 for col in existing_cols 748 if col not in (omit_columns or []) 749 ] 750 if not select_columns 751 else [ 752 col 753 for col in select_columns 754 if col in existing_cols 755 and col not in (omit_columns or []) 756 ] 757 ) 758 if select_columns: 759 dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns} 760 dtypes = { 761 col: to_pandas_dtype(typ) 762 for col, typ in dtypes.items() 763 if col in select_columns and col not in (omit_columns or []) 764 } 765 query = self.get_pipe_data_query( 766 pipe, 767 select_columns=select_columns, 768 omit_columns=omit_columns, 769 begin=begin, 770 end=end, 771 params=params, 772 order=order, 773 limit=limit, 774 begin_add_minutes=begin_add_minutes, 775 end_add_minutes=end_add_minutes, 776 debug=debug, 777 **kw 778 ) 779 780 if is_dask: 781 index_col = pipe.columns.get('datetime', None) 782 kw['index_col'] = index_col 783 784 numeric_columns = [ 785 col 786 for col, typ in pipe.dtypes.items() 787 if typ == 'numeric' and col in dtypes 788 ] 789 uuid_columns = [ 790 col 791 for col, typ in pipe.dtypes.items() 792 if typ == 'uuid' and col in dtypes 793 ] 794 795 kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0)) 796 797 df = self.read( 798 query, 799 dtype=dtypes, 800 debug=debug, 801 **kw 802 ) 803 for col in numeric_columns: 804 if col not in df.columns: 805 continue 806 df[col] = df[col].apply(attempt_cast_to_numeric) 807 808 for col in uuid_columns: 809 if col not in df.columns: 810 continue 811 df[col] = df[col].apply(attempt_cast_to_uuid) 812 813 if self.flavor == 'sqlite': 814 ignore_dt_cols = [ 815 col 816 for col, dtype in pipe.dtypes.items() 817 if 'datetime' not in str(dtype) 818 ] 819 ### NOTE: We have to consume the iterator here to ensure that datetimes are parsed correctly 820 df = ( 821 parse_df_datetimes( 822 df, 823 ignore_cols=ignore_dt_cols, 824 chunksize=kw.get('chunksize', None), 825 debug=debug, 826 ) if isinstance(df, pd.DataFrame) else ( 827 [ 828 parse_df_datetimes( 829 c, 830 ignore_cols=ignore_dt_cols, 831 chunksize=kw.get('chunksize', None), 832 debug=debug, 833 ) 834 for c in df 835 ] 836 ) 837 ) 838 for col, typ in dtypes.items(): 839 if typ != 'json': 840 continue 841 df[col] = df[col].apply(lambda x: json.loads(x) if x is not None else x) 842 return df
Access a pipe's data from the SQL instance.
Parameters
- pipe (mrsm.Pipe:): The pipe to get data from.
- select_columns (Optional[List[str]], default None):
If provided, only select these given columns.
Otherwise select all available columns (i.e.
SELECT *
). - omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
- begin (Union[datetime, str, None], default None): If provided, get rows newer than or equal to this value.
- end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
- params (Optional[Dict[str, Any]], default None):
Additional parameters to filter by.
See
meerschaum.connectors.sql.build_where
. - order (Optional[str], default 'asc'):
The selection order for all of the indices in the query.
If
None
, omit theORDER BY
clause. - limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
- begin_add_minutes (int, default 0):
The number of minutes to add to the
begin
datetime (i.e.DATEADD
. - end_add_minutes (int, default 0):
The number of minutes to add to the
end
datetime (i.e.DATEADD
. - chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
- debug (bool, default False): Verbosity toggle.
Returns
- A
pd.DataFrame
of the pipe's data.
845def get_pipe_data_query( 846 self, 847 pipe: mrsm.Pipe, 848 select_columns: Optional[List[str]] = None, 849 omit_columns: Optional[List[str]] = None, 850 begin: Union[datetime, int, str, None] = None, 851 end: Union[datetime, int, str, None] = None, 852 params: Optional[Dict[str, Any]] = None, 853 order: Optional[str] = 'asc', 854 sort_datetimes: bool = False, 855 limit: Optional[int] = None, 856 begin_add_minutes: int = 0, 857 end_add_minutes: int = 0, 858 replace_nulls: Optional[str] = None, 859 debug: bool = False, 860 **kw: Any 861) -> Union[str, None]: 862 """ 863 Return the `SELECT` query for retrieving a pipe's data from its instance. 864 865 Parameters 866 ---------- 867 pipe: mrsm.Pipe: 868 The pipe to get data from. 869 870 select_columns: Optional[List[str]], default None 871 If provided, only select these given columns. 872 Otherwise select all available columns (i.e. `SELECT *`). 873 874 omit_columns: Optional[List[str]], default None 875 If provided, remove these columns from the selection. 876 877 begin: Union[datetime, int, str, None], default None 878 If provided, get rows newer than or equal to this value. 879 880 end: Union[datetime, str, None], default None 881 If provided, get rows older than or equal to this value. 882 883 params: Optional[Dict[str, Any]], default None 884 Additional parameters to filter by. 885 See `meerschaum.connectors.sql.build_where`. 886 887 order: Optional[str], default None 888 The selection order for all of the indices in the query. 889 If `None`, omit the `ORDER BY` clause. 890 891 sort_datetimes: bool, default False 892 Alias for `order='desc'`. 893 894 limit: Optional[int], default None 895 If specified, limit the number of rows retrieved to this value. 896 897 begin_add_minutes: int, default 0 898 The number of minutes to add to the `begin` datetime (i.e. `DATEADD`). 899 900 end_add_minutes: int, default 0 901 The number of minutes to add to the `end` datetime (i.e. `DATEADD`). 902 903 chunksize: Optional[int], default -1 904 The size of dataframe chunks to load into memory. 905 906 replace_nulls: Optional[str], default None 907 If provided, replace null values with this value. 908 909 debug: bool, default False 910 Verbosity toggle. 911 912 Returns 913 ------- 914 A `SELECT` query to retrieve a pipe's data. 915 """ 916 from meerschaum.utils.debug import dprint 917 from meerschaum.utils.misc import items_str 918 from meerschaum.utils.sql import sql_item_name, dateadd_str 919 from meerschaum.utils.packages import import_pandas 920 pd = import_pandas() 921 existing_cols = pipe.get_columns_types(debug=debug) 922 select_columns = ( 923 [col for col in existing_cols] 924 if not select_columns 925 else [col for col in select_columns if col in existing_cols] 926 ) 927 if omit_columns: 928 select_columns = [col for col in select_columns if col not in omit_columns] 929 930 if order is None and sort_datetimes: 931 order = 'desc' 932 933 if begin == '': 934 begin = pipe.get_sync_time(debug=debug) 935 backtrack_interval = pipe.get_backtrack_interval(debug=debug) 936 if begin is not None: 937 begin -= backtrack_interval 938 939 cols_names = [sql_item_name(col, self.flavor, None) for col in select_columns] 940 select_cols_str = ( 941 'SELECT\n ' 942 + ',\n '.join( 943 [ 944 ( 945 col_name 946 if not replace_nulls 947 else f"COALESCE(col_name, '{replace_nulls}') AS {col_name}" 948 ) 949 for col_name in cols_names 950 ] 951 ) 952 ) 953 pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 954 query = f"{select_cols_str}\nFROM {pipe_table_name}" 955 where = "" 956 957 if order is not None: 958 default_order = 'asc' 959 if order not in ('asc', 'desc'): 960 warn(f"Ignoring unsupported order '{order}'. Falling back to '{default_order}'.") 961 order = default_order 962 order = order.upper() 963 964 if not pipe.columns.get('datetime', None): 965 _dt = pipe.guess_datetime() 966 dt = sql_item_name(_dt, self.flavor, None) if _dt else None 967 is_guess = True 968 else: 969 _dt = pipe.get_columns('datetime') 970 dt = sql_item_name(_dt, self.flavor, None) 971 is_guess = False 972 973 quoted_indices = { 974 key: sql_item_name(val, self.flavor, None) 975 for key, val in pipe.columns.items() 976 if val in existing_cols 977 } 978 979 if begin is not None or end is not None: 980 if is_guess: 981 if _dt is None: 982 warn( 983 f"No datetime could be determined for {pipe}." 984 + "\n Ignoring begin and end...", 985 stack=False, 986 ) 987 begin, end = None, None 988 else: 989 warn( 990 f"A datetime wasn't specified for {pipe}.\n" 991 + f" Using column \"{_dt}\" for datetime bounds...", 992 stack=False, 993 ) 994 995 is_dt_bound = False 996 if begin is not None and _dt in existing_cols: 997 begin_da = dateadd_str( 998 flavor=self.flavor, 999 datepart='minute', 1000 number=begin_add_minutes, 1001 begin=begin, 1002 ) 1003 where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "") 1004 is_dt_bound = True 1005 1006 if end is not None and _dt in existing_cols: 1007 if 'int' in str(type(end)).lower() and end == begin: 1008 end += 1 1009 end_da = dateadd_str( 1010 flavor=self.flavor, 1011 datepart='minute', 1012 number=end_add_minutes, 1013 begin=end 1014 ) 1015 where += f"{dt} < {end_da}" 1016 is_dt_bound = True 1017 1018 if params is not None: 1019 from meerschaum.utils.sql import build_where 1020 valid_params = {k: v for k, v in params.items() if k in existing_cols} 1021 if valid_params: 1022 where += build_where(valid_params, self).replace( 1023 'WHERE', ('AND' if is_dt_bound else "") 1024 ) 1025 1026 if len(where) > 0: 1027 query += "\nWHERE " + where 1028 1029 if order is not None: 1030 ### Sort by indices, starting with datetime. 1031 order_by = "" 1032 if quoted_indices: 1033 order_by += "\nORDER BY " 1034 if _dt and _dt in existing_cols: 1035 order_by += dt + ' ' + order + ',' 1036 for key, quoted_col_name in quoted_indices.items(): 1037 if dt == quoted_col_name: 1038 continue 1039 order_by += ' ' + quoted_col_name + ' ' + order + ',' 1040 order_by = order_by[:-1] 1041 1042 query += order_by 1043 1044 if isinstance(limit, int): 1045 if self.flavor == 'mssql': 1046 query = f'SELECT TOP {limit}\n' + query[len("SELECT "):] 1047 elif self.flavor == 'oracle': 1048 query = ( 1049 f"SELECT * FROM (\n {query}\n)\n" 1050 + f"WHERE ROWNUM IN ({', '.join([str(i) for i in range(1, limit+1)])})" 1051 ) 1052 else: 1053 query += f"\nLIMIT {limit}" 1054 1055 if debug: 1056 to_print = ( 1057 [] 1058 + ([f"begin='{begin}'"] if begin else []) 1059 + ([f"end='{end}'"] if end else []) 1060 + ([f"params={params}"] if params else []) 1061 ) 1062 dprint("Getting pipe data with constraints: " + items_str(to_print, quotes=False)) 1063 1064 return query
Return the SELECT
query for retrieving a pipe's data from its instance.
Parameters
- pipe (mrsm.Pipe:): The pipe to get data from.
- select_columns (Optional[List[str]], default None):
If provided, only select these given columns.
Otherwise select all available columns (i.e.
SELECT *
). - omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
- begin (Union[datetime, int, str, None], default None): If provided, get rows newer than or equal to this value.
- end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
- params (Optional[Dict[str, Any]], default None):
Additional parameters to filter by.
See
meerschaum.connectors.sql.build_where
. - order (Optional[str], default None):
The selection order for all of the indices in the query.
If
None
, omit theORDER BY
clause. - sort_datetimes (bool, default False):
Alias for
order='desc'
. - limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
- begin_add_minutes (int, default 0):
The number of minutes to add to the
begin
datetime (i.e.DATEADD
). - end_add_minutes (int, default 0):
The number of minutes to add to the
end
datetime (i.e.DATEADD
). - chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
- replace_nulls (Optional[str], default None): If provided, replace null values with this value.
- debug (bool, default False): Verbosity toggle.
Returns
- A
SELECT
query to retrieve a pipe's data.
19def register_pipe( 20 self, 21 pipe: mrsm.Pipe, 22 debug: bool = False, 23) -> SuccessTuple: 24 """ 25 Register a new pipe. 26 A pipe's attributes must be set before registering. 27 """ 28 from meerschaum.utils.debug import dprint 29 from meerschaum.utils.packages import attempt_import 30 from meerschaum.utils.sql import json_flavors 31 32 ### ensure pipes table exists 33 from meerschaum.connectors.sql.tables import get_tables 34 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 35 36 if pipe.get_id(debug=debug) is not None: 37 return False, f"{pipe} is already registered." 38 39 ### NOTE: if `parameters` is supplied in the Pipe constructor, 40 ### then `pipe.parameters` will exist and not be fetched from the database. 41 42 ### 1. Prioritize the Pipe object's `parameters` first. 43 ### E.g. if the user manually sets the `parameters` property 44 ### or if the Pipe already exists 45 ### (which shouldn't be able to be registered anyway but that's an issue for later). 46 parameters = None 47 try: 48 parameters = pipe.parameters 49 except Exception as e: 50 if debug: 51 dprint(str(e)) 52 parameters = None 53 54 ### ensure `parameters` is a dictionary 55 if parameters is None: 56 parameters = {} 57 58 import json 59 sqlalchemy = attempt_import('sqlalchemy') 60 values = { 61 'connector_keys' : pipe.connector_keys, 62 'metric_key' : pipe.metric_key, 63 'location_key' : pipe.location_key, 64 'parameters' : ( 65 json.dumps(parameters) 66 if self.flavor not in json_flavors 67 else parameters 68 ), 69 } 70 query = sqlalchemy.insert(pipes_tbl).values(**values) 71 result = self.exec(query, debug=debug) 72 if result is None: 73 return False, f"Failed to register {pipe}." 74 return True, f"Successfully registered {pipe}."
Register a new pipe. A pipe's attributes must be set before registering.
77def edit_pipe( 78 self, 79 pipe : mrsm.Pipe = None, 80 patch: bool = False, 81 debug: bool = False, 82 **kw : Any 83) -> SuccessTuple: 84 """ 85 Persist a Pipe's parameters to its database. 86 87 Parameters 88 ---------- 89 pipe: mrsm.Pipe, default None 90 The pipe to be edited. 91 patch: bool, default False 92 If patch is `True`, update the existing parameters by cascading. 93 Otherwise overwrite the parameters (default). 94 debug: bool, default False 95 Verbosity toggle. 96 """ 97 98 if pipe.id is None: 99 return False, f"{pipe} is not registered and cannot be edited." 100 101 from meerschaum.utils.debug import dprint 102 from meerschaum.utils.packages import attempt_import 103 from meerschaum.utils.sql import json_flavors 104 if not patch: 105 parameters = pipe.__dict__.get('_attributes', {}).get('parameters', {}) 106 else: 107 from meerschaum import Pipe 108 from meerschaum.config._patch import apply_patch_to_config 109 original_parameters = Pipe( 110 pipe.connector_keys, pipe.metric_key, pipe.location_key, 111 mrsm_instance=pipe.instance_keys 112 ).parameters 113 parameters = apply_patch_to_config( 114 original_parameters, 115 pipe.parameters 116 ) 117 118 ### ensure pipes table exists 119 from meerschaum.connectors.sql.tables import get_tables 120 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 121 122 import json 123 sqlalchemy = attempt_import('sqlalchemy') 124 125 values = { 126 'parameters': ( 127 json.dumps(parameters) 128 if self.flavor not in json_flavors 129 else parameters 130 ), 131 } 132 q = sqlalchemy.update(pipes_tbl).values(**values).where( 133 pipes_tbl.c.pipe_id == pipe.id 134 ) 135 136 result = self.exec(q, debug=debug) 137 message = ( 138 f"Successfully edited {pipe}." 139 if result is not None else f"Failed to edit {pipe}." 140 ) 141 return (result is not None), message
Persist a Pipe's parameters to its database.
Parameters
- pipe (mrsm.Pipe, default None): The pipe to be edited.
- patch (bool, default False):
If patch is
True
, update the existing parameters by cascading. Otherwise overwrite the parameters (default). - debug (bool, default False): Verbosity toggle.
1067def get_pipe_id( 1068 self, 1069 pipe: mrsm.Pipe, 1070 debug: bool = False, 1071) -> Any: 1072 """ 1073 Get a Pipe's ID from the pipes table. 1074 """ 1075 if pipe.temporary: 1076 return None 1077 from meerschaum.utils.packages import attempt_import 1078 import json 1079 sqlalchemy = attempt_import('sqlalchemy') 1080 from meerschaum.connectors.sql.tables import get_tables 1081 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 1082 1083 query = sqlalchemy.select(pipes_tbl.c.pipe_id).where( 1084 pipes_tbl.c.connector_keys == pipe.connector_keys 1085 ).where( 1086 pipes_tbl.c.metric_key == pipe.metric_key 1087 ).where( 1088 (pipes_tbl.c.location_key == pipe.location_key) if pipe.location_key is not None 1089 else pipes_tbl.c.location_key.is_(None) 1090 ) 1091 _id = self.value(query, debug=debug, silent=pipe.temporary) 1092 if _id is not None: 1093 _id = int(_id) 1094 return _id
Get a Pipe's ID from the pipes table.
1097def get_pipe_attributes( 1098 self, 1099 pipe: mrsm.Pipe, 1100 debug: bool = False, 1101) -> Dict[str, Any]: 1102 """ 1103 Get a Pipe's attributes dictionary. 1104 """ 1105 from meerschaum.connectors.sql.tables import get_tables 1106 from meerschaum.utils.packages import attempt_import 1107 sqlalchemy = attempt_import('sqlalchemy') 1108 1109 if pipe.get_id(debug=debug) is None: 1110 return {} 1111 1112 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 1113 1114 try: 1115 q = sqlalchemy.select(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id) 1116 if debug: 1117 dprint(q) 1118 attributes = ( 1119 dict(self.exec(q, silent=True, debug=debug).first()._mapping) 1120 if self.flavor != 'duckdb' 1121 else self.read(q, debug=debug).to_dict(orient='records')[0] 1122 ) 1123 except Exception as e: 1124 import traceback 1125 traceback.print_exc() 1126 warn(e) 1127 print(pipe) 1128 return {} 1129 1130 ### handle non-PostgreSQL databases (text vs JSON) 1131 if not isinstance(attributes.get('parameters', None), dict): 1132 try: 1133 import json 1134 parameters = json.loads(attributes['parameters']) 1135 if isinstance(parameters, str) and parameters[0] == '{': 1136 parameters = json.loads(parameters) 1137 attributes['parameters'] = parameters 1138 except Exception as e: 1139 attributes['parameters'] = {} 1140 1141 return attributes
Get a Pipe's attributes dictionary.
1144def sync_pipe( 1145 self, 1146 pipe: mrsm.Pipe, 1147 df: Union[pd.DataFrame, str, Dict[Any, Any], None] = None, 1148 begin: Optional[datetime] = None, 1149 end: Optional[datetime] = None, 1150 chunksize: Optional[int] = -1, 1151 check_existing: bool = True, 1152 blocking: bool = True, 1153 debug: bool = False, 1154 _check_temporary_tables: bool = True, 1155 **kw: Any 1156) -> SuccessTuple: 1157 """ 1158 Sync a pipe using a database connection. 1159 1160 Parameters 1161 ---------- 1162 pipe: mrsm.Pipe 1163 The Meerschaum Pipe instance into which to sync the data. 1164 1165 df: Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]] 1166 An optional DataFrame or equivalent to sync into the pipe. 1167 Defaults to `None`. 1168 1169 begin: Optional[datetime], default None 1170 Optionally specify the earliest datetime to search for data. 1171 Defaults to `None`. 1172 1173 end: Optional[datetime], default None 1174 Optionally specify the latest datetime to search for data. 1175 Defaults to `None`. 1176 1177 chunksize: Optional[int], default -1 1178 Specify the number of rows to sync per chunk. 1179 If `-1`, resort to system configuration (default is `900`). 1180 A `chunksize` of `None` will sync all rows in one transaction. 1181 Defaults to `-1`. 1182 1183 check_existing: bool, default True 1184 If `True`, pull and diff with existing data from the pipe. Defaults to `True`. 1185 1186 blocking: bool, default True 1187 If `True`, wait for sync to finish and return its result, otherwise asyncronously sync. 1188 Defaults to `True`. 1189 1190 debug: bool, default False 1191 Verbosity toggle. Defaults to False. 1192 1193 kw: Any 1194 Catch-all for keyword arguments. 1195 1196 Returns 1197 ------- 1198 A `SuccessTuple` of success (`bool`) and message (`str`). 1199 """ 1200 from meerschaum.utils.packages import import_pandas 1201 from meerschaum.utils.sql import get_update_queries, sql_item_name, json_flavors, update_queries 1202 from meerschaum.utils.misc import generate_password 1203 from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols 1204 from meerschaum.utils.dtypes import are_dtypes_equal 1205 from meerschaum import Pipe 1206 import time 1207 import copy 1208 pd = import_pandas() 1209 if df is None: 1210 msg = f"DataFrame is None. Cannot sync {pipe}." 1211 warn(msg) 1212 return False, msg 1213 1214 start = time.perf_counter() 1215 1216 if not pipe.temporary and not pipe.get_id(debug=debug): 1217 register_tuple = pipe.register(debug=debug) 1218 if not register_tuple[0]: 1219 return register_tuple 1220 1221 ### df is the dataframe returned from the remote source 1222 ### via the connector 1223 if debug: 1224 dprint("Fetched data:\n" + str(df)) 1225 1226 if not isinstance(df, pd.DataFrame): 1227 df = pipe.enforce_dtypes( 1228 df, 1229 chunksize=chunksize, 1230 safe_copy=kw.get('safe_copy', False), 1231 debug=debug, 1232 ) 1233 1234 ### if table does not exist, create it with indices 1235 is_new = False 1236 add_cols_query = None 1237 if not pipe.exists(debug=debug): 1238 check_existing = False 1239 is_new = True 1240 else: 1241 ### Check for new columns. 1242 add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug) 1243 if add_cols_queries: 1244 if not self.exec_queries(add_cols_queries, debug=debug): 1245 warn(f"Failed to add new columns to {pipe}.") 1246 1247 alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug) 1248 if alter_cols_queries: 1249 if not self.exec_queries(alter_cols_queries, debug=debug): 1250 warn(f"Failed to alter columns for {pipe}.") 1251 else: 1252 _ = pipe.infer_dtypes(persist=True) 1253 1254 ### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans, 1255 ### so infer bools and persist them to `dtypes`. 1256 ### MSSQL supports `BIT` for booleans, but we coerce bools to int for MSSQL 1257 ### to avoid merge issues. 1258 if self.flavor in ('oracle', 'sqlite', 'mssql', 'mysql', 'mariadb'): 1259 pipe_dtypes = pipe.dtypes 1260 new_bool_cols = { 1261 col: 'bool[pyarrow]' 1262 for col, typ in df.dtypes.items() 1263 if col not in pipe_dtypes 1264 and are_dtypes_equal(str(typ), 'bool') 1265 } 1266 pipe_dtypes.update(new_bool_cols) 1267 pipe.dtypes = pipe_dtypes 1268 if new_bool_cols and not pipe.temporary: 1269 infer_bool_success, infer_bool_msg = pipe.edit(debug=debug) 1270 if not infer_bool_success: 1271 return infer_bool_success, infer_bool_msg 1272 1273 upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries 1274 if upsert: 1275 check_existing = False 1276 kw['safe_copy'] = kw.get('safe_copy', False) 1277 1278 unseen_df, update_df, delta_df = ( 1279 pipe.filter_existing( 1280 df, 1281 chunksize=chunksize, 1282 debug=debug, 1283 **kw 1284 ) if check_existing else (df, None, df) 1285 ) 1286 if upsert: 1287 unseen_df, update_df, delta_df = (df.head(0), df, df) 1288 1289 if debug: 1290 dprint("Delta data:\n" + str(delta_df)) 1291 dprint("Unseen data:\n" + str(unseen_df)) 1292 if update_df is not None: 1293 dprint(("Update" if not upsert else "Upsert") + " data:\n" + str(update_df)) 1294 1295 if_exists = kw.get('if_exists', 'append') 1296 if 'if_exists' in kw: 1297 kw.pop('if_exists') 1298 if 'name' in kw: 1299 kw.pop('name') 1300 1301 ### Insert new data into Pipe's table. 1302 unseen_kw = copy.deepcopy(kw) 1303 unseen_kw.update({ 1304 'name': pipe.target, 1305 'if_exists': if_exists, 1306 'debug': debug, 1307 'as_dict': True, 1308 'chunksize': chunksize, 1309 'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True), 1310 'schema': self.get_pipe_schema(pipe), 1311 }) 1312 1313 stats = self.to_sql(unseen_df, **unseen_kw) 1314 if is_new: 1315 if not self.create_indices(pipe, debug=debug): 1316 warn(f"Failed to create indices for {pipe}. Continuing...") 1317 1318 if update_df is not None and len(update_df) > 0: 1319 dt_col = pipe.columns.get('datetime', None) 1320 dt_typ = pipe.dtypes.get(dt_col, None) 1321 dt_name = sql_item_name(dt_col, self.flavor) if dt_col else None 1322 update_min = update_df[dt_col].min() if dt_col and dt_col in update_df.columns else None 1323 update_max = update_df[dt_col].max() if dt_col and dt_col in update_df.columns else None 1324 update_begin = update_min 1325 update_end = ( 1326 update_max 1327 + ( 1328 timedelta(minutes=1) 1329 if are_dtypes_equal(str(dt_typ), 'datetime') 1330 else 1 1331 ) 1332 ) if dt_col else None 1333 1334 transact_id = generate_password(3) 1335 temp_target = '##' + transact_id + '_' + pipe.target 1336 self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug) 1337 temp_pipe = Pipe( 1338 pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key, 1339 instance=pipe.instance_keys, 1340 columns={ 1341 ix_key: ix 1342 for ix_key, ix in pipe.columns.items() 1343 if ix and ix in update_df.columns 1344 }, 1345 dtypes=pipe.dtypes, 1346 target=temp_target, 1347 temporary=True, 1348 parameters={ 1349 'schema': self.internal_schema, 1350 'hypertable': False, 1351 }, 1352 ) 1353 temp_pipe.sync(update_df, check_existing=False, debug=debug) 1354 existing_cols = pipe.get_columns_types(debug=debug) 1355 join_cols = [ 1356 col 1357 for col_key, col in pipe.columns.items() 1358 if col and col in existing_cols 1359 ] 1360 update_queries = get_update_queries( 1361 pipe.target, 1362 temp_target, 1363 self, 1364 join_cols, 1365 upsert=upsert, 1366 schema=self.get_pipe_schema(pipe), 1367 patch_schema=self.internal_schema, 1368 datetime_col=pipe.columns.get('datetime', None), 1369 debug=debug, 1370 ) 1371 update_success = all( 1372 self.exec_queries(update_queries, break_on_error=True, rollback=True, debug=debug) 1373 ) 1374 self._log_temporary_tables_creation( 1375 temp_target, 1376 ready_to_drop=True, 1377 create=(not pipe.temporary), 1378 debug=debug, 1379 ) 1380 if not update_success: 1381 warn(f"Failed to apply update to {pipe}.") 1382 1383 stop = time.perf_counter() 1384 success = stats['success'] 1385 if not success: 1386 return success, stats['msg'] 1387 1388 unseen_count = len(unseen_df.index) if unseen_df is not None else 0 1389 update_count = len(update_df.index) if update_df is not None else 0 1390 msg = ( 1391 ( 1392 f"Inserted {unseen_count}, " 1393 + f"updated {update_count} rows." 1394 ) 1395 if not upsert 1396 else ( 1397 f"Upserted {update_count} row" 1398 + ('s' if update_count != 1 else '') 1399 + "." 1400 ) 1401 ) 1402 if debug: 1403 msg = msg[:-1] + ( 1404 f"\non table {sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))}\n" 1405 + f"in {round(stop - start, 2)} seconds." 1406 ) 1407 1408 if _check_temporary_tables: 1409 drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables( 1410 refresh=False, debug=debug 1411 ) 1412 if not drop_stale_success: 1413 warn(drop_stale_msg) 1414 1415 return success, msg
Sync a pipe using a database connection.
Parameters
- pipe (mrsm.Pipe): The Meerschaum Pipe instance into which to sync the data.
- df (Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]]):
An optional DataFrame or equivalent to sync into the pipe.
Defaults to
None
. - begin (Optional[datetime], default None):
Optionally specify the earliest datetime to search for data.
Defaults to
None
. - end (Optional[datetime], default None):
Optionally specify the latest datetime to search for data.
Defaults to
None
. - chunksize (Optional[int], default -1):
Specify the number of rows to sync per chunk.
If
-1
, resort to system configuration (default is900
). Achunksize
ofNone
will sync all rows in one transaction. Defaults to-1
. - check_existing (bool, default True):
If
True
, pull and diff with existing data from the pipe. Defaults toTrue
. - blocking (bool, default True):
If
True
, wait for sync to finish and return its result, otherwise asyncronously sync. Defaults toTrue
. - debug (bool, default False): Verbosity toggle. Defaults to False.
- kw (Any): Catch-all for keyword arguments.
Returns
- A
SuccessTuple
of success (bool
) and message (str
).
1418def sync_pipe_inplace( 1419 self, 1420 pipe: 'mrsm.Pipe', 1421 params: Optional[Dict[str, Any]] = None, 1422 begin: Optional[datetime] = None, 1423 end: Optional[datetime] = None, 1424 chunksize: Optional[int] = -1, 1425 check_existing: bool = True, 1426 debug: bool = False, 1427 **kw: Any 1428) -> SuccessTuple: 1429 """ 1430 If a pipe's connector is the same as its instance connector, 1431 it's more efficient to sync the pipe in-place rather than reading data into Pandas. 1432 1433 Parameters 1434 ---------- 1435 pipe: mrsm.Pipe 1436 The pipe whose connector is the same as its instance. 1437 1438 params: Optional[Dict[str, Any]], default None 1439 Optional params dictionary to build the `WHERE` clause. 1440 See `meerschaum.utils.sql.build_where`. 1441 1442 begin: Optional[datetime], default None 1443 Optionally specify the earliest datetime to search for data. 1444 Defaults to `None`. 1445 1446 end: Optional[datetime], default None 1447 Optionally specify the latest datetime to search for data. 1448 Defaults to `None`. 1449 1450 chunksize: Optional[int], default -1 1451 Specify the number of rows to sync per chunk. 1452 If `-1`, resort to system configuration (default is `900`). 1453 A `chunksize` of `None` will sync all rows in one transaction. 1454 Defaults to `-1`. 1455 1456 check_existing: bool, default True 1457 If `True`, pull and diff with existing data from the pipe. 1458 1459 debug: bool, default False 1460 Verbosity toggle. 1461 1462 Returns 1463 ------- 1464 A SuccessTuple. 1465 """ 1466 if self.flavor == 'duckdb': 1467 return pipe.sync( 1468 params=params, 1469 begin=begin, 1470 end=end, 1471 chunksize=chunksize, 1472 check_existing=check_existing, 1473 debug=debug, 1474 _inplace=False, 1475 **kw 1476 ) 1477 from meerschaum.utils.sql import ( 1478 sql_item_name, 1479 get_sqlalchemy_table, 1480 get_update_queries, 1481 get_null_replacement, 1482 NO_CTE_FLAVORS, 1483 NO_SELECT_INTO_FLAVORS, 1484 format_cte_subquery, 1485 get_create_table_query, 1486 get_table_cols_types, 1487 truncate_item_name, 1488 session_execute, 1489 table_exists, 1490 update_queries, 1491 ) 1492 from meerschaum.utils.dtypes.sql import ( 1493 get_pd_type_from_db_type, 1494 ) 1495 from meerschaum.utils.misc import generate_password 1496 from meerschaum.utils.debug import dprint 1497 1498 transact_id = generate_password(3) 1499 def get_temp_table_name(label: str) -> str: 1500 return '##' + transact_id + '_' + label + '_' + pipe.target 1501 1502 internal_schema = self.internal_schema 1503 temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update'] 1504 temp_tables = { 1505 table_root: get_temp_table_name(table_root) 1506 for table_root in temp_table_roots 1507 } 1508 temp_table_names = { 1509 table_root: sql_item_name( 1510 table_name_raw, 1511 self.flavor, 1512 internal_schema, 1513 ) 1514 for table_root, table_name_raw in temp_tables.items() 1515 } 1516 metadef = self.get_pipe_metadef( 1517 pipe, 1518 params=params, 1519 begin=begin, 1520 end=end, 1521 check_existing=check_existing, 1522 debug=debug, 1523 ) 1524 pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 1525 upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in update_queries 1526 database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None)) 1527 1528 def clean_up_temp_tables(ready_to_drop: bool = False): 1529 log_success, log_msg = self._log_temporary_tables_creation( 1530 [ 1531 table 1532 for table in temp_tables.values() 1533 ] if not upsert else [temp_tables['update']], 1534 ready_to_drop=ready_to_drop, 1535 create=(not pipe.temporary), 1536 debug=debug, 1537 ) 1538 if not log_success: 1539 warn(log_msg) 1540 drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables( 1541 refresh=False, 1542 debug=debug, 1543 ) 1544 if not drop_stale_success: 1545 warn(drop_stale_msg) 1546 return drop_stale_success, drop_stale_msg 1547 1548 sqlalchemy, sqlalchemy_orm = mrsm.attempt_import('sqlalchemy', 'sqlalchemy.orm') 1549 if not pipe.exists(debug=debug): 1550 create_pipe_query = get_create_table_query( 1551 metadef, 1552 pipe.target, 1553 self.flavor, 1554 schema=self.get_pipe_schema(pipe), 1555 ) 1556 result = self.exec(create_pipe_query, debug=debug) 1557 if result is None: 1558 _ = clean_up_temp_tables() 1559 return False, f"Could not insert new data into {pipe} from its SQL query definition." 1560 1561 if not self.create_indices(pipe, debug=debug): 1562 warn(f"Failed to create indices for {pipe}. Continuing...") 1563 1564 rowcount = pipe.get_rowcount(debug=debug) 1565 _ = clean_up_temp_tables() 1566 return True, f"Inserted {rowcount}, updated 0 rows." 1567 1568 session = sqlalchemy_orm.Session(self.engine) 1569 connectable = session if self.flavor != 'duckdb' else self 1570 1571 create_new_query = get_create_table_query( 1572 metadef, 1573 temp_tables[('new') if not upsert else 'update'], 1574 self.flavor, 1575 schema=internal_schema, 1576 ) 1577 (create_new_success, create_new_msg), create_new_results = session_execute( 1578 session, 1579 create_new_query, 1580 with_results=True, 1581 debug=debug, 1582 ) 1583 if not create_new_success: 1584 _ = clean_up_temp_tables() 1585 return create_new_success, create_new_msg 1586 new_count = create_new_results[0].rowcount if create_new_results else 0 1587 1588 new_cols_types = get_table_cols_types( 1589 temp_tables[('new' if not upsert else 'update')], 1590 connectable=connectable, 1591 flavor=self.flavor, 1592 schema=internal_schema, 1593 database=database, 1594 debug=debug, 1595 ) 1596 if not new_cols_types: 1597 return False, f"Failed to get new columns for {pipe}." 1598 1599 new_cols = { 1600 str(col_name): get_pd_type_from_db_type(str(col_type)) 1601 for col_name, col_type in new_cols_types.items() 1602 } 1603 new_cols_str = ', '.join([ 1604 sql_item_name(col, self.flavor) 1605 for col in new_cols 1606 ]) 1607 1608 add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug) 1609 if add_cols_queries: 1610 self.exec_queries(add_cols_queries, debug=debug) 1611 1612 alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug) 1613 if alter_cols_queries: 1614 self.exec_queries(alter_cols_queries, debug=debug) 1615 1616 insert_queries = [ 1617 ( 1618 f"INSERT INTO {pipe_name} ({new_cols_str})\n" 1619 + f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}" 1620 ) 1621 ] if not check_existing and not upsert else [] 1622 1623 new_queries = insert_queries 1624 new_success, new_msg = ( 1625 session_execute(session, new_queries, debug=debug) 1626 if new_queries 1627 else (True, "Success") 1628 ) 1629 if not new_success: 1630 _ = clean_up_temp_tables() 1631 return new_success, new_msg 1632 1633 if not check_existing: 1634 session.commit() 1635 _ = clean_up_temp_tables() 1636 return True, f"Inserted {new_count}, updated 0 rows." 1637 1638 backtrack_def = self.get_pipe_data_query( 1639 pipe, 1640 begin=begin, 1641 end=end, 1642 begin_add_minutes=0, 1643 end_add_minutes=1, 1644 params=params, 1645 debug=debug, 1646 order=None, 1647 ) 1648 1649 create_backtrack_query = get_create_table_query( 1650 backtrack_def, 1651 temp_tables['backtrack'], 1652 self.flavor, 1653 schema=internal_schema, 1654 ) 1655 (create_backtrack_success, create_backtrack_msg), create_backtrack_results = session_execute( 1656 session, 1657 create_backtrack_query, 1658 with_results=True, 1659 debug=debug, 1660 ) if not upsert else (True, "Success"), None 1661 1662 if not create_backtrack_success: 1663 _ = clean_up_temp_tables() 1664 return create_backtrack_success, create_backtrack_msg 1665 1666 backtrack_cols_types = get_table_cols_types( 1667 temp_tables['backtrack'], 1668 connectable=connectable, 1669 flavor=self.flavor, 1670 schema=internal_schema, 1671 database=database, 1672 debug=debug, 1673 ) if not upsert else new_cols_types 1674 1675 common_cols = [col for col in new_cols if col in backtrack_cols_types] 1676 on_cols = { 1677 col: new_cols.get(col, 'object') 1678 for col_key, col in pipe.columns.items() 1679 if ( 1680 col 1681 and 1682 col_key != 'value' 1683 and col in backtrack_cols_types 1684 and col in new_cols 1685 ) 1686 } 1687 1688 null_replace_new_cols_str = ( 1689 ', '.join([ 1690 f"COALESCE({temp_table_names['new']}.{sql_item_name(col, self.flavor, None)}, " 1691 + f"{get_null_replacement(typ, self.flavor)}) AS " 1692 + sql_item_name(col, self.flavor, None) 1693 for col, typ in new_cols.items() 1694 ]) 1695 ) 1696 1697 select_delta_query = ( 1698 "SELECT\n" 1699 + null_replace_new_cols_str + "\n" 1700 + f"\nFROM {temp_table_names['new']}\n" 1701 + f"LEFT OUTER JOIN {temp_table_names['backtrack']}\nON\n" 1702 + '\nAND\n'.join([ 1703 ( 1704 f"COALESCE({temp_table_names['new']}." 1705 + sql_item_name(c, self.flavor, None) 1706 + ", " 1707 + get_null_replacement(new_cols[c], self.flavor) 1708 + ") " 1709 + ' = ' 1710 + f"COALESCE({temp_table_names['backtrack']}." 1711 + sql_item_name(c, self.flavor, None) 1712 + ", " 1713 + get_null_replacement(backtrack_cols_types[c], self.flavor) 1714 + ") " 1715 ) for c in common_cols 1716 ]) 1717 + "\nWHERE\n" 1718 + '\nAND\n'.join([ 1719 ( 1720 f"{temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None) + ' IS NULL' 1721 ) for c in common_cols 1722 ]) 1723 ) 1724 create_delta_query = get_create_table_query( 1725 select_delta_query, 1726 temp_tables['delta'], 1727 self.flavor, 1728 schema=internal_schema, 1729 ) 1730 create_delta_success, create_delta_msg = session_execute( 1731 session, 1732 create_delta_query, 1733 debug=debug, 1734 ) if not upsert else (True, "Success") 1735 if not create_delta_success: 1736 _ = clean_up_temp_tables() 1737 return create_delta_success, create_delta_msg 1738 1739 delta_cols_types = get_table_cols_types( 1740 temp_tables['delta'], 1741 connectable=connectable, 1742 flavor=self.flavor, 1743 schema=internal_schema, 1744 database=database, 1745 debug=debug, 1746 ) if not upsert else new_cols_types 1747 1748 ### This is a weird bug on SQLite. 1749 ### Sometimes the backtrack dtypes are all empty strings. 1750 if not all(delta_cols_types.values()): 1751 delta_cols_types = new_cols_types 1752 1753 delta_cols = { 1754 col: get_pd_type_from_db_type(typ) 1755 for col, typ in delta_cols_types.items() 1756 } 1757 delta_cols_str = ', '.join([ 1758 sql_item_name(col, self.flavor) 1759 for col in delta_cols 1760 ]) 1761 1762 select_joined_query = ( 1763 "SELECT " 1764 + (', '.join([ 1765 ( 1766 f"{temp_table_names['delta']}." + sql_item_name(c, self.flavor, None) 1767 + " AS " + sql_item_name(c + '_delta', self.flavor, None) 1768 ) for c in delta_cols 1769 ])) 1770 + ", " 1771 + (', '.join([ 1772 ( 1773 f"{temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None) 1774 + " AS " + sql_item_name(c + '_backtrack', self.flavor, None) 1775 ) for c in backtrack_cols_types 1776 ])) 1777 + f"\nFROM {temp_table_names['delta']}\n" 1778 + f"LEFT OUTER JOIN {temp_table_names['backtrack']}\nON\n" 1779 + '\nAND\n'.join([ 1780 ( 1781 f"COALESCE({temp_table_names['delta']}." + sql_item_name(c, self.flavor, None) 1782 + ", " + get_null_replacement(typ, self.flavor) + ")" 1783 + ' = ' 1784 + f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None) 1785 + ", " + get_null_replacement(typ, self.flavor) + ")" 1786 ) for c, typ in on_cols.items() 1787 ]) 1788 ) 1789 1790 create_joined_query = get_create_table_query( 1791 select_joined_query, 1792 temp_tables['joined'], 1793 self.flavor, 1794 schema = internal_schema, 1795 ) 1796 create_joined_success, create_joined_msg = session_execute( 1797 session, 1798 create_joined_query, 1799 debug=debug, 1800 ) if on_cols and not upsert else (True, "Success") 1801 if not create_joined_success: 1802 _ = clean_up_temp_tables() 1803 return create_joined_success, create_joined_msg 1804 1805 select_unseen_query = ( 1806 "SELECT " 1807 + (', '.join([ 1808 ( 1809 "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None) 1810 + " != " + get_null_replacement(typ, self.flavor) 1811 + " THEN " + sql_item_name(c + '_delta', self.flavor, None) 1812 + "\n ELSE NULL\nEND " 1813 + " AS " + sql_item_name(c, self.flavor, None) 1814 ) for c, typ in delta_cols.items() 1815 ])) 1816 + f"\nFROM {temp_table_names['joined']}\n" 1817 + "WHERE " 1818 + '\nAND\n'.join([ 1819 ( 1820 sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL' 1821 ) for c in delta_cols 1822 ]) 1823 ) 1824 create_unseen_query = get_create_table_query( 1825 select_unseen_query, 1826 temp_tables['unseen'], 1827 self.flavor, 1828 internal_schema, 1829 ) 1830 (create_unseen_success, create_unseen_msg), create_unseen_results = session_execute( 1831 session, 1832 create_unseen_query, 1833 with_results=True, 1834 debug=debug 1835 ) if not upsert else (True, "Success"), None 1836 if not create_unseen_success: 1837 _ = clean_up_temp_tables() 1838 return create_unseen_success, create_unseen_msg 1839 1840 select_update_query = ( 1841 "SELECT " 1842 + (', '.join([ 1843 ( 1844 "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None) 1845 + " != " + get_null_replacement(typ, self.flavor) 1846 + " THEN " + sql_item_name(c + '_delta', self.flavor, None) 1847 + "\n ELSE NULL\nEND " 1848 + " AS " + sql_item_name(c, self.flavor, None) 1849 ) for c, typ in delta_cols.items() 1850 ])) 1851 + f"\nFROM {temp_table_names['joined']}\n" 1852 + "WHERE " 1853 + '\nOR\n'.join([ 1854 ( 1855 sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL' 1856 ) for c in delta_cols 1857 ]) 1858 ) 1859 1860 create_update_query = get_create_table_query( 1861 select_update_query, 1862 temp_tables['update'], 1863 self.flavor, 1864 internal_schema, 1865 ) 1866 (create_update_success, create_update_msg), create_update_results = session_execute( 1867 session, 1868 create_update_query, 1869 with_results=True, 1870 debug=debug, 1871 ) if on_cols and not upsert else ((True, "Success"), []) 1872 apply_update_queries = ( 1873 get_update_queries( 1874 pipe.target, 1875 temp_tables['update'], 1876 session, 1877 on_cols, 1878 upsert=upsert, 1879 schema=self.get_pipe_schema(pipe), 1880 patch_schema=internal_schema, 1881 datetime_col=pipe.columns.get('datetime', None), 1882 flavor=self.flavor, 1883 debug=debug, 1884 ) 1885 if on_cols else [] 1886 ) 1887 1888 apply_unseen_queries = [ 1889 ( 1890 f"INSERT INTO {pipe_name} ({delta_cols_str})\n" 1891 + f"SELECT {delta_cols_str}\nFROM " 1892 + ( 1893 temp_table_names['unseen'] 1894 if on_cols 1895 else temp_table_names['delta'] 1896 ) 1897 ), 1898 ] 1899 1900 (apply_unseen_success, apply_unseen_msg), apply_unseen_results = session_execute( 1901 session, 1902 apply_unseen_queries, 1903 with_results=True, 1904 debug=debug, 1905 ) if not upsert else (True, "Success"), None 1906 if not apply_unseen_success: 1907 _ = clean_up_temp_tables() 1908 return apply_unseen_success, apply_unseen_msg 1909 unseen_count = apply_unseen_results[0].rowcount if apply_unseen_results else 0 1910 1911 (apply_update_success, apply_update_msg), apply_update_results = session_execute( 1912 session, 1913 apply_update_queries, 1914 with_results=True, 1915 debug=debug, 1916 ) 1917 if not apply_update_success: 1918 _ = clean_up_temp_tables() 1919 return apply_update_success, apply_update_msg 1920 update_count = apply_update_results[0].rowcount if apply_update_results else 0 1921 1922 session.commit() 1923 1924 msg = ( 1925 f"Inserted {unseen_count}, updated {update_count} rows." 1926 if not upsert 1927 else f"Upserted {update_count} row" + ('s' if update_count != 1 else '') + "." 1928 ) 1929 _ = clean_up_temp_tables(ready_to_drop=True) 1930 1931 return True, msg
If a pipe's connector is the same as its instance connector, it's more efficient to sync the pipe in-place rather than reading data into Pandas.
Parameters
- pipe (mrsm.Pipe): The pipe whose connector is the same as its instance.
- params (Optional[Dict[str, Any]], default None):
Optional params dictionary to build the
WHERE
clause. Seemeerschaum.utils.sql.build_where
. - begin (Optional[datetime], default None):
Optionally specify the earliest datetime to search for data.
Defaults to
None
. - end (Optional[datetime], default None):
Optionally specify the latest datetime to search for data.
Defaults to
None
. - chunksize (Optional[int], default -1):
Specify the number of rows to sync per chunk.
If
-1
, resort to system configuration (default is900
). Achunksize
ofNone
will sync all rows in one transaction. Defaults to-1
. - check_existing (bool, default True):
If
True
, pull and diff with existing data from the pipe. - debug (bool, default False): Verbosity toggle.
Returns
- A SuccessTuple.
1934def get_sync_time( 1935 self, 1936 pipe: 'mrsm.Pipe', 1937 params: Optional[Dict[str, Any]] = None, 1938 newest: bool = True, 1939 debug: bool = False, 1940 ) -> Union[datetime, int, None]: 1941 """Get a Pipe's most recent datetime value. 1942 1943 Parameters 1944 ---------- 1945 pipe: mrsm.Pipe 1946 The pipe to get the sync time for. 1947 1948 params: Optional[Dict[str, Any]], default None 1949 Optional params dictionary to build the `WHERE` clause. 1950 See `meerschaum.utils.sql.build_where`. 1951 1952 newest: bool, default True 1953 If `True`, get the most recent datetime (honoring `params`). 1954 If `False`, get the oldest datetime (ASC instead of DESC). 1955 1956 Returns 1957 ------- 1958 A `datetime` object (or `int` if using an integer axis) if the pipe exists, otherwise `None`. 1959 """ 1960 from meerschaum.utils.sql import sql_item_name, build_where 1961 table = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 1962 1963 dt_col = pipe.columns.get('datetime', None) 1964 dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns]') 1965 if not dt_col: 1966 _dt = pipe.guess_datetime() 1967 dt = sql_item_name(_dt, self.flavor, None) if _dt else None 1968 is_guess = True 1969 else: 1970 _dt = dt_col 1971 dt = sql_item_name(_dt, self.flavor, None) 1972 is_guess = False 1973 1974 if _dt is None: 1975 return None 1976 1977 ASC_or_DESC = "DESC" if newest else "ASC" 1978 existing_cols = pipe.get_columns_types(debug=debug) 1979 valid_params = {} 1980 if params is not None: 1981 valid_params = {k: v for k, v in params.items() if k in existing_cols} 1982 1983 ### If no bounds are provided for the datetime column, 1984 ### add IS NOT NULL to the WHERE clause. 1985 if _dt not in valid_params: 1986 valid_params[_dt] = '_None' 1987 where = "" if not valid_params else build_where(valid_params, self) 1988 q = f"SELECT {dt}\nFROM {table}{where}\nORDER BY {dt} {ASC_or_DESC}\nLIMIT 1" 1989 if self.flavor == 'mssql': 1990 q = f"SELECT TOP 1 {dt}\nFROM {table}{where}\nORDER BY {dt} {ASC_or_DESC}" 1991 elif self.flavor == 'oracle': 1992 q = ( 1993 "SELECT * FROM (\n" 1994 + f" SELECT {dt}\nFROM {table}{where}\n ORDER BY {dt} {ASC_or_DESC}\n" 1995 + ") WHERE ROWNUM = 1" 1996 ) 1997 1998 try: 1999 db_time = self.value(q, silent=True, debug=debug) 2000 2001 ### No datetime could be found. 2002 if db_time is None: 2003 return None 2004 ### sqlite returns str. 2005 if isinstance(db_time, str): 2006 from meerschaum.utils.packages import attempt_import 2007 dateutil_parser = attempt_import('dateutil.parser') 2008 st = dateutil_parser.parse(db_time) 2009 ### Do nothing if a datetime object is returned. 2010 elif isinstance(db_time, datetime): 2011 if hasattr(db_time, 'to_pydatetime'): 2012 st = db_time.to_pydatetime() 2013 else: 2014 st = db_time 2015 ### Sometimes the datetime is actually a date. 2016 elif isinstance(db_time, date): 2017 st = datetime.combine(db_time, datetime.min.time()) 2018 ### Adding support for an integer datetime axis. 2019 elif 'int' in str(type(db_time)).lower(): 2020 st = int(db_time) 2021 ### Convert pandas timestamp to Python datetime. 2022 else: 2023 st = db_time.to_pydatetime() 2024 2025 sync_time = st 2026 2027 except Exception as e: 2028 sync_time = None 2029 warn(str(e)) 2030 2031 return sync_time
Get a Pipe's most recent datetime value.
Parameters
- pipe (mrsm.Pipe): The pipe to get the sync time for.
- params (Optional[Dict[str, Any]], default None):
Optional params dictionary to build the
WHERE
clause. Seemeerschaum.utils.sql.build_where
. - newest (bool, default True):
If
True
, get the most recent datetime (honoringparams
). IfFalse
, get the oldest datetime (ASC instead of DESC).
Returns
- A
datetime
object (orint
if using an integer axis) if the pipe exists, otherwiseNone
.
2034def pipe_exists( 2035 self, 2036 pipe: mrsm.Pipe, 2037 debug: bool = False 2038 ) -> bool: 2039 """ 2040 Check that a Pipe's table exists. 2041 2042 Parameters 2043 ---------- 2044 pipe: mrsm.Pipe: 2045 The pipe to check. 2046 2047 debug: bool, default False 2048 Verbosity toggle. 2049 2050 Returns 2051 ------- 2052 A `bool` corresponding to whether a pipe's table exists. 2053 2054 """ 2055 from meerschaum.utils.sql import table_exists 2056 exists = table_exists( 2057 pipe.target, 2058 self, 2059 schema = self.get_pipe_schema(pipe), 2060 debug = debug, 2061 ) 2062 if debug: 2063 from meerschaum.utils.debug import dprint 2064 dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.')) 2065 return exists
Check that a Pipe's table exists.
Parameters
- pipe (mrsm.Pipe:): The pipe to check.
- debug (bool, default False): Verbosity toggle.
Returns
- A
bool
corresponding to whether a pipe's table exists.
2068def get_pipe_rowcount( 2069 self, 2070 pipe: mrsm.Pipe, 2071 begin: Union[datetime, int, None] = None, 2072 end: Union[datetime, int, None] = None, 2073 params: Optional[Dict[str, Any]] = None, 2074 remote: bool = False, 2075 debug: bool = False 2076) -> Union[int, None]: 2077 """ 2078 Get the rowcount for a pipe in accordance with given parameters. 2079 2080 Parameters 2081 ---------- 2082 pipe: mrsm.Pipe 2083 The pipe to query with. 2084 2085 begin: Union[datetime, int, None], default None 2086 The begin datetime value. 2087 2088 end: Union[datetime, int, None], default None 2089 The end datetime value. 2090 2091 params: Optional[Dict[str, Any]], default None 2092 See `meerschaum.utils.sql.build_where`. 2093 2094 remote: bool, default False 2095 If `True`, get the rowcount for the remote table. 2096 2097 debug: bool, default False 2098 Verbosity toggle. 2099 2100 Returns 2101 ------- 2102 An `int` for the number of rows if the `pipe` exists, otherwise `None`. 2103 2104 """ 2105 from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte 2106 from meerschaum.connectors.sql._fetch import get_pipe_query 2107 if remote: 2108 msg = f"'fetch:definition' must be an attribute of {pipe} to get a remote rowcount." 2109 if 'fetch' not in pipe.parameters: 2110 error(msg) 2111 return None 2112 if 'definition' not in pipe.parameters['fetch']: 2113 error(msg) 2114 return None 2115 2116 _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 2117 2118 if not pipe.columns.get('datetime', None): 2119 _dt = pipe.guess_datetime() 2120 dt = sql_item_name(_dt, self.flavor, None) if _dt else None 2121 is_guess = True 2122 else: 2123 _dt = pipe.get_columns('datetime') 2124 dt = sql_item_name(_dt, self.flavor, None) 2125 is_guess = False 2126 2127 if begin is not None or end is not None: 2128 if is_guess: 2129 if _dt is None: 2130 warn( 2131 f"No datetime could be determined for {pipe}." 2132 + "\n Ignoring begin and end...", 2133 stack=False, 2134 ) 2135 begin, end = None, None 2136 else: 2137 warn( 2138 f"A datetime wasn't specified for {pipe}.\n" 2139 + f" Using column \"{_dt}\" for datetime bounds...", 2140 stack=False, 2141 ) 2142 2143 2144 _datetime_name = sql_item_name( 2145 _dt, 2146 ( 2147 pipe.instance_connector.flavor 2148 if not remote 2149 else pipe.connector.flavor 2150 ), 2151 None, 2152 ) 2153 _cols_names = [ 2154 sql_item_name( 2155 col, 2156 ( 2157 pipe.instance_connector.flavor 2158 if not remote 2159 else pipe.connector.flavor 2160 ), 2161 None, 2162 ) 2163 for col in set( 2164 ( 2165 [_dt] 2166 if _dt 2167 else [] 2168 ) 2169 + ( 2170 [] 2171 if params is None 2172 else list(params.keys()) 2173 ) 2174 ) 2175 ] 2176 if not _cols_names: 2177 _cols_names = ['*'] 2178 2179 src = ( 2180 f"SELECT {', '.join(_cols_names)} FROM {_pipe_name}" 2181 if not remote 2182 else get_pipe_query(pipe) 2183 ) 2184 parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', self.flavor)}" 2185 query = wrap_query_with_cte(src, parent_query, self.flavor) 2186 if begin is not None or end is not None: 2187 query += "\nWHERE" 2188 if begin is not None: 2189 query += f""" 2190 {dt} >= {dateadd_str(self.flavor, datepart='minute', number=0, begin=begin)} 2191 """ 2192 if end is not None and begin is not None: 2193 query += "AND" 2194 if end is not None: 2195 query += f""" 2196 {dt} < {dateadd_str(self.flavor, datepart='minute', number=0, begin=end)} 2197 """ 2198 if params is not None: 2199 from meerschaum.utils.sql import build_where 2200 existing_cols = pipe.get_columns_types(debug=debug) 2201 valid_params = {k: v for k, v in params.items() if k in existing_cols} 2202 if valid_params: 2203 query += build_where(valid_params, self).replace('WHERE', ( 2204 'AND' if (begin is not None or end is not None) 2205 else 'WHERE' 2206 ) 2207 ) 2208 2209 result = self.value(query, debug=debug, silent=True) 2210 try: 2211 return int(result) 2212 except Exception as e: 2213 return None
Get the rowcount for a pipe in accordance with given parameters.
Parameters
- pipe (mrsm.Pipe): The pipe to query with.
- begin (Union[datetime, int, None], default None): The begin datetime value.
- end (Union[datetime, int, None], default None): The end datetime value.
- params (Optional[Dict[str, Any]], default None):
See
meerschaum.utils.sql.build_where
. - remote (bool, default False):
If
True
, get the rowcount for the remote table. - debug (bool, default False): Verbosity toggle.
Returns
- An
int
for the number of rows if thepipe
exists, otherwiseNone
.
2216def drop_pipe( 2217 self, 2218 pipe: mrsm.Pipe, 2219 debug: bool = False, 2220 **kw 2221) -> SuccessTuple: 2222 """ 2223 Drop a pipe's tables but maintain its registration. 2224 2225 Parameters 2226 ---------- 2227 pipe: mrsm.Pipe 2228 The pipe to drop. 2229 2230 Returns 2231 ------- 2232 A `SuccessTuple` indicated success. 2233 """ 2234 from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS 2235 success = True 2236 target = pipe.target 2237 target_name = ( 2238 sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2239 ) 2240 if table_exists(target, self, debug=debug): 2241 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 2242 success = self.exec( 2243 f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug 2244 ) is not None 2245 2246 msg = "Success" if success else f"Failed to drop {pipe}." 2247 return success, msg
Drop a pipe's tables but maintain its registration.
Parameters
- pipe (mrsm.Pipe): The pipe to drop.
Returns
- A
SuccessTuple
indicated success.
2250def clear_pipe( 2251 self, 2252 pipe: mrsm.Pipe, 2253 begin: Union[datetime, int, None] = None, 2254 end: Union[datetime, int, None] = None, 2255 params: Optional[Dict[str, Any]] = None, 2256 debug: bool = False, 2257 **kw 2258) -> SuccessTuple: 2259 """ 2260 Delete a pipe's data within a bounded or unbounded interval without dropping the table. 2261 2262 Parameters 2263 ---------- 2264 pipe: mrsm.Pipe 2265 The pipe to clear. 2266 2267 begin: Union[datetime, int, None], default None 2268 Beginning datetime. Inclusive. 2269 2270 end: Union[datetime, int, None], default None 2271 Ending datetime. Exclusive. 2272 2273 params: Optional[Dict[str, Any]], default None 2274 See `meerschaum.utils.sql.build_where`. 2275 2276 """ 2277 if not pipe.exists(debug=debug): 2278 return True, f"{pipe} does not exist, so nothing was cleared." 2279 2280 from meerschaum.utils.sql import sql_item_name, build_where, dateadd_str 2281 pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 2282 2283 if not pipe.columns.get('datetime', None): 2284 _dt = pipe.guess_datetime() 2285 dt_name = sql_item_name(_dt, self.flavor, None) if _dt else None 2286 is_guess = True 2287 else: 2288 _dt = pipe.get_columns('datetime') 2289 dt_name = sql_item_name(_dt, self.flavor, None) 2290 is_guess = False 2291 2292 if begin is not None or end is not None: 2293 if is_guess: 2294 if _dt is None: 2295 warn( 2296 f"No datetime could be determined for {pipe}." 2297 + "\n Ignoring datetime bounds...", 2298 stack = False, 2299 ) 2300 begin, end = None, None 2301 else: 2302 warn( 2303 f"A datetime wasn't specified for {pipe}.\n" 2304 + f" Using column \"{_dt}\" for datetime bounds...", 2305 stack = False, 2306 ) 2307 2308 valid_params = {} 2309 if params is not None: 2310 existing_cols = pipe.get_columns_types(debug=debug) 2311 valid_params = {k: v for k, v in params.items() if k in existing_cols} 2312 clear_query = ( 2313 f"DELETE FROM {pipe_name}\nWHERE 1 = 1\n" 2314 + (' AND ' + build_where(valid_params, self, with_where=False) if valid_params else '') 2315 + ( 2316 f' AND {dt_name} >= ' + dateadd_str(self.flavor, 'day', 0, begin) 2317 if begin is not None else '' 2318 ) + ( 2319 f' AND {dt_name} < ' + dateadd_str(self.flavor, 'day', 0, end) 2320 if end is not None else '' 2321 ) 2322 ) 2323 success = self.exec(clear_query, silent=True, debug=debug) is not None 2324 msg = "Success" if success else f"Failed to clear {pipe}." 2325 return success, msg
Delete a pipe's data within a bounded or unbounded interval without dropping the table.
Parameters
- pipe (mrsm.Pipe): The pipe to clear.
- begin (Union[datetime, int, None], default None): Beginning datetime. Inclusive.
- end (Union[datetime, int, None], default None): Ending datetime. Exclusive.
- params (Optional[Dict[str, Any]], default None):
See
meerschaum.utils.sql.build_where
.
2879def deduplicate_pipe( 2880 self, 2881 pipe: mrsm.Pipe, 2882 begin: Union[datetime, int, None] = None, 2883 end: Union[datetime, int, None] = None, 2884 params: Optional[Dict[str, Any]] = None, 2885 debug: bool = False, 2886 **kwargs: Any 2887 ) -> SuccessTuple: 2888 """ 2889 Delete duplicate values within a pipe's table. 2890 2891 Parameters 2892 ---------- 2893 pipe: mrsm.Pipe 2894 The pipe whose table to deduplicate. 2895 2896 begin: Union[datetime, int, None], default None 2897 If provided, only deduplicate values greater than or equal to this value. 2898 2899 end: Union[datetime, int, None], default None 2900 If provided, only deduplicate values less than this value. 2901 2902 params: Optional[Dict[str, Any]], default None 2903 If provided, further limit deduplication to values which match this query dictionary. 2904 2905 debug: bool, default False 2906 Verbosity toggle. 2907 2908 Returns 2909 ------- 2910 A `SuccessTuple` indicating success. 2911 """ 2912 from meerschaum.utils.sql import ( 2913 sql_item_name, 2914 NO_CTE_FLAVORS, 2915 get_rename_table_queries, 2916 NO_SELECT_INTO_FLAVORS, 2917 DROP_IF_EXISTS_FLAVORS, 2918 get_create_table_query, 2919 format_cte_subquery, 2920 get_null_replacement, 2921 ) 2922 from meerschaum.utils.misc import generate_password, flatten_list 2923 2924 pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 2925 2926 if not pipe.exists(debug=debug): 2927 return False, f"Table {pipe_table_name} does not exist." 2928 2929 ### TODO: Handle deleting duplicates without a datetime axis. 2930 dt_col = pipe.columns.get('datetime', None) 2931 dt_col_name = sql_item_name(dt_col, self.flavor, None) 2932 cols_types = pipe.get_columns_types(debug=debug) 2933 existing_cols = pipe.get_columns_types(debug=debug) 2934 2935 get_rowcount_query = f"SELECT COUNT(*) FROM {pipe_table_name}" 2936 old_rowcount = self.value(get_rowcount_query, debug=debug) 2937 if old_rowcount is None: 2938 return False, f"Failed to get rowcount for table {pipe_table_name}." 2939 2940 ### Non-datetime indices that in fact exist. 2941 indices = [ 2942 col 2943 for key, col in pipe.columns.items() 2944 if col and col != dt_col and col in cols_types 2945 ] 2946 indices_names = [sql_item_name(index_col, self.flavor, None) for index_col in indices] 2947 existing_cols_names = [sql_item_name(col, self.flavor, None) for col in existing_cols] 2948 duplicates_cte_name = sql_item_name('dups', self.flavor, None) 2949 duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None) 2950 previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None) 2951 2952 index_list_str = ( 2953 sql_item_name(dt_col, self.flavor, None) 2954 if dt_col 2955 else '' 2956 ) 2957 index_list_str_ordered = ( 2958 ( 2959 sql_item_name(dt_col, self.flavor, None) + " DESC" 2960 ) 2961 if dt_col 2962 else '' 2963 ) 2964 if indices: 2965 index_list_str += ', ' + ', '.join(indices_names) 2966 index_list_str_ordered += ', ' + ', '.join(indices_names) 2967 if index_list_str.startswith(','): 2968 index_list_str = index_list_str.lstrip(',').lstrip() 2969 if index_list_str_ordered.startswith(','): 2970 index_list_str_ordered = index_list_str_ordered.lstrip(',').lstrip() 2971 2972 cols_list_str = ', '.join(existing_cols_names) 2973 2974 try: 2975 ### NOTE: MySQL 5 and below does not support window functions (ROW_NUMBER()). 2976 is_old_mysql = ( 2977 self.flavor in ('mysql', 'mariadb') 2978 and 2979 int(self.db_version.split('.')[0]) < 8 2980 ) 2981 except Exception as e: 2982 is_old_mysql = False 2983 2984 src_query = f""" 2985 SELECT 2986 {cols_list_str}, 2987 ROW_NUMBER() OVER ( 2988 PARTITION BY 2989 {index_list_str} 2990 ORDER BY {index_list_str_ordered} 2991 ) AS {duplicate_row_number_name} 2992 FROM {pipe_table_name} 2993 """ 2994 duplicates_cte_subquery = format_cte_subquery( 2995 src_query, 2996 self.flavor, 2997 sub_name = 'src', 2998 cols_to_select = cols_list_str, 2999 ) + f""" 3000 WHERE {duplicate_row_number_name} = 1 3001 """ 3002 old_mysql_query = ( 3003 f""" 3004 SELECT 3005 {index_list_str} 3006 FROM ( 3007 SELECT 3008 {index_list_str}, 3009 IF( 3010 @{previous_row_number_name} <> {index_list_str.replace(', ', ' + ')}, 3011 @{duplicate_row_number_name} := 0, 3012 @{duplicate_row_number_name} 3013 ), 3014 @{previous_row_number_name} := {index_list_str.replace(', ', ' + ')}, 3015 @{duplicate_row_number_name} := @{duplicate_row_number_name} + 1 AS """ 3016 + f"""{duplicate_row_number_name} 3017 FROM 3018 {pipe_table_name}, 3019 ( 3020 SELECT @{duplicate_row_number_name} := 0 3021 ) AS {duplicate_row_number_name}, 3022 ( 3023 SELECT @{previous_row_number_name} := '{get_null_replacement('str', 'mysql')}' 3024 ) AS {previous_row_number_name} 3025 ORDER BY {index_list_str_ordered} 3026 ) AS t 3027 WHERE {duplicate_row_number_name} = 1 3028 """ 3029 ) 3030 if is_old_mysql: 3031 duplicates_cte_subquery = old_mysql_query 3032 3033 session_id = generate_password(3) 3034 3035 dedup_table = '-' + session_id + f'_dedup_{pipe.target}' 3036 temp_old_table = '-' + session_id + f"_old_{pipe.target}" 3037 3038 dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe)) 3039 temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe)) 3040 3041 create_temporary_table_query = get_create_table_query( 3042 duplicates_cte_subquery, 3043 dedup_table, 3044 self.flavor, 3045 ) + f""" 3046 ORDER BY {index_list_str_ordered} 3047 """ 3048 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 3049 alter_queries = flatten_list([ 3050 get_rename_table_queries( 3051 pipe.target, temp_old_table, self.flavor, schema=self.get_pipe_schema(pipe) 3052 ), 3053 get_rename_table_queries( 3054 dedup_table, pipe.target, self.flavor, schema=self.get_pipe_schema(pipe) 3055 ), 3056 f""" 3057 DROP TABLE {if_exists_str} {temp_old_table_name} 3058 """, 3059 ]) 3060 3061 create_temporary_result = self.execute(create_temporary_table_query, debug=debug) 3062 if create_temporary_result is None: 3063 return False, f"Failed to deduplicate table {pipe_table_name}." 3064 3065 results = self.exec_queries( 3066 alter_queries, 3067 break_on_error=True, 3068 rollback=True, 3069 debug=debug, 3070 ) 3071 3072 fail_query = None 3073 for result, query in zip(results, alter_queries): 3074 if result is None: 3075 fail_query = query 3076 break 3077 success = fail_query is None 3078 3079 new_rowcount = ( 3080 self.value(get_rowcount_query, debug=debug) 3081 if success 3082 else None 3083 ) 3084 3085 msg = ( 3086 ( 3087 f"Successfully deduplicated table {pipe_table_name}" 3088 + ( 3089 f"\nfrom {old_rowcount} to {new_rowcount} rows" 3090 if old_rowcount != new_rowcount 3091 else '' 3092 ) 3093 + '.' 3094 ) 3095 if success 3096 else f"Failed to execute query:\n{fail_query}" 3097 ) 3098 return success, msg
Delete duplicate values within a pipe's table.
Parameters
- pipe (mrsm.Pipe): The pipe whose table to deduplicate.
- begin (Union[datetime, int, None], default None): If provided, only deduplicate values greater than or equal to this value.
- end (Union[datetime, int, None], default None): If provided, only deduplicate values less than this value.
- params (Optional[Dict[str, Any]], default None): If provided, further limit deduplication to values which match this query dictionary.
- debug (bool, default False): Verbosity toggle.
Returns
- A
SuccessTuple
indicating success.
2328def get_pipe_table( 2329 self, 2330 pipe: mrsm.Pipe, 2331 debug: bool = False, 2332) -> Union['sqlalchemy.Table', None]: 2333 """ 2334 Return the `sqlalchemy.Table` object for a `mrsm.Pipe`. 2335 2336 Parameters 2337 ---------- 2338 pipe: mrsm.Pipe: 2339 The pipe in question. 2340 2341 Returns 2342 ------- 2343 A `sqlalchemy.Table` object. 2344 2345 """ 2346 from meerschaum.utils.sql import get_sqlalchemy_table 2347 if not pipe.exists(debug=debug): 2348 return None 2349 return get_sqlalchemy_table( 2350 pipe.target, 2351 connector=self, 2352 schema=self.get_pipe_schema(pipe), 2353 debug=debug, 2354 refresh=True, 2355 )
Return the sqlalchemy.Table
object for a mrsm.Pipe
.
Parameters
- pipe (mrsm.Pipe:): The pipe in question.
Returns
- A
sqlalchemy.Table
object.
2358def get_pipe_columns_types( 2359 self, 2360 pipe: mrsm.Pipe, 2361 debug: bool = False, 2362) -> Dict[str, str]: 2363 """ 2364 Get the pipe's columns and types. 2365 2366 Parameters 2367 ---------- 2368 pipe: mrsm.Pipe: 2369 The pipe to get the columns for. 2370 2371 Returns 2372 ------- 2373 A dictionary of columns names (`str`) and types (`str`). 2374 2375 Examples 2376 -------- 2377 >>> conn.get_pipe_columns_types(pipe) 2378 { 2379 'dt': 'TIMESTAMP WITHOUT TIMEZONE', 2380 'id': 'BIGINT', 2381 'val': 'DOUBLE PRECISION', 2382 } 2383 >>> 2384 """ 2385 if not pipe.exists(debug=debug): 2386 return {} 2387 2388 if self.flavor == 'duckdb': 2389 from meerschaum.utils.sql import get_table_cols_types 2390 return get_table_cols_types( 2391 pipe.target, 2392 self, 2393 flavor=self.flavor, 2394 schema=self.get_pipe_schema(pipe), 2395 ) 2396 2397 table_columns = {} 2398 try: 2399 pipe_table = self.get_pipe_table(pipe, debug=debug) 2400 if pipe_table is None: 2401 return {} 2402 for col in pipe_table.columns: 2403 table_columns[str(col.name)] = str(col.type) 2404 except Exception as e: 2405 import traceback 2406 traceback.print_exc() 2407 warn(e) 2408 table_columns = {} 2409 2410 return table_columns
Get the pipe's columns and types.
Parameters
- pipe (mrsm.Pipe:): The pipe to get the columns for.
Returns
- A dictionary of columns names (
str
) and types (str
).
Examples
>>> conn.get_pipe_columns_types(pipe)
{
'dt': 'TIMESTAMP WITHOUT TIMEZONE',
'id': 'BIGINT',
'val': 'DOUBLE PRECISION',
}
>>>
2826def get_to_sql_dtype( 2827 self, 2828 pipe: 'mrsm.Pipe', 2829 df: 'pd.DataFrame', 2830 update_dtypes: bool = True, 2831 ) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']: 2832 """ 2833 Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`. 2834 2835 Parameters 2836 ---------- 2837 pipe: mrsm.Pipe 2838 The pipe which may contain a `dtypes` parameter. 2839 2840 df: pd.DataFrame 2841 The DataFrame to be pushed via `to_sql()`. 2842 2843 update_dtypes: bool, default True 2844 If `True`, patch the pipe's dtypes onto the DataFrame's dtypes. 2845 2846 Returns 2847 ------- 2848 A dictionary with `sqlalchemy` datatypes. 2849 2850 Examples 2851 -------- 2852 >>> import pandas as pd 2853 >>> import meerschaum as mrsm 2854 >>> 2855 >>> conn = mrsm.get_connector('sql:memory') 2856 >>> df = pd.DataFrame([{'a': {'b': 1}}]) 2857 >>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'}) 2858 >>> get_to_sql_dtype(pipe, df) 2859 {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>} 2860 """ 2861 from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols 2862 from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type 2863 df_dtypes = { 2864 col: str(typ) 2865 for col, typ in df.dtypes.items() 2866 } 2867 json_cols = get_json_cols(df) 2868 numeric_cols = get_numeric_cols(df) 2869 df_dtypes.update({col: 'json' for col in json_cols}) 2870 df_dtypes.update({col: 'numeric' for col in numeric_cols}) 2871 if update_dtypes: 2872 df_dtypes.update(pipe.dtypes) 2873 return { 2874 col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True) 2875 for col, typ in df_dtypes.items() 2876 }
Given a pipe and DataFrame, return the dtype
dictionary for to_sql()
.
Parameters
- pipe (mrsm.Pipe):
The pipe which may contain a
dtypes
parameter. - df (pd.DataFrame):
The DataFrame to be pushed via
to_sql()
. - update_dtypes (bool, default True):
If
True
, patch the pipe's dtypes onto the DataFrame's dtypes.
Returns
- A dictionary with
sqlalchemy
datatypes.
Examples
>>> import pandas as pd
>>> import meerschaum as mrsm
>>>
>>> conn = mrsm.get_connector('sql:memory')
>>> df = pd.DataFrame([{'a': {'b': 1}}])
>>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'})
>>> get_to_sql_dtype(pipe, df)
{'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
3101def get_pipe_schema(self, pipe: mrsm.Pipe) -> Union[str, None]: 3102 """ 3103 Return the schema to use for this pipe. 3104 First check `pipe.parameters['schema']`, then check `self.schema`. 3105 3106 Parameters 3107 ---------- 3108 pipe: mrsm.Pipe 3109 The pipe which may contain a configured schema. 3110 3111 Returns 3112 ------- 3113 A schema string or `None` if nothing is configured. 3114 """ 3115 return pipe.parameters.get('schema', self.schema)
Return the schema to use for this pipe.
First check pipe.parameters['schema']
, then check self.schema
.
Parameters
- pipe (mrsm.Pipe): The pipe which may contain a configured schema.
Returns
- A schema string or
None
if nothing is configured.
17def register_plugin( 18 self, 19 plugin: 'mrsm.core.Plugin', 20 force: bool = False, 21 debug: bool = False, 22 **kw: Any 23) -> SuccessTuple: 24 """Register a new plugin to the plugins table.""" 25 from meerschaum.utils.warnings import warn, error 26 from meerschaum.utils.packages import attempt_import 27 sqlalchemy = attempt_import('sqlalchemy') 28 from meerschaum.utils.sql import json_flavors 29 from meerschaum.connectors.sql.tables import get_tables 30 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 31 32 old_id = self.get_plugin_id(plugin, debug=debug) 33 34 ### Check for version conflict. May be overridden with `--force`. 35 if old_id is not None and not force: 36 old_version = self.get_plugin_version(plugin, debug=debug) 37 new_version = plugin.version 38 if old_version is None: 39 old_version = '' 40 if new_version is None: 41 new_version = '' 42 43 ### verify that the new version is greater than the old 44 packaging_version = attempt_import('packaging.version') 45 if ( 46 old_version and new_version 47 and packaging_version.parse(old_version) >= packaging_version.parse(new_version) 48 ): 49 return False, ( 50 f"Version '{new_version}' of plugin '{plugin}' " + 51 f"must be greater than existing version '{old_version}'." 52 ) 53 54 bind_variables = { 55 'plugin_name': plugin.name, 56 'version': plugin.version, 57 'attributes': ( 58 json.dumps(plugin.attributes) if self.flavor not in json_flavors else plugin.attributes 59 ), 60 'user_id': plugin.user_id, 61 } 62 63 if old_id is None: 64 query = sqlalchemy.insert(plugins_tbl).values(**bind_variables) 65 else: 66 query = ( 67 sqlalchemy.update(plugins_tbl) 68 .values(**bind_variables) 69 .where(plugins_tbl.c.plugin_id == old_id) 70 ) 71 72 result = self.exec(query, debug=debug) 73 if result is None: 74 return False, f"Failed to register plugin '{plugin}'." 75 return True, f"Successfully registered plugin '{plugin}'."
Register a new plugin to the plugins table.
243def delete_plugin( 244 self, 245 plugin: 'mrsm.core.Plugin', 246 debug: bool = False, 247 **kw: Any 248) -> SuccessTuple: 249 """Delete a plugin from the plugins table.""" 250 from meerschaum.utils.warnings import warn, error 251 from meerschaum.utils.packages import attempt_import 252 sqlalchemy = attempt_import('sqlalchemy') 253 from meerschaum.connectors.sql.tables import get_tables 254 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 255 256 plugin_id = self.get_plugin_id(plugin, debug=debug) 257 if plugin_id is None: 258 return True, f"Plugin '{plugin}' was not registered." 259 260 bind_variables = { 261 'plugin_id' : plugin_id, 262 } 263 264 query = sqlalchemy.delete(plugins_tbl).where(plugins_tbl.c.plugin_id == plugin_id) 265 result = self.exec(query, debug=debug) 266 if result is None: 267 return False, f"Failed to delete plugin '{plugin}'." 268 return True, f"Successfully deleted plugin '{plugin}'."
Delete a plugin from the plugins table.
77def get_plugin_id( 78 self, 79 plugin: 'mrsm.core.Plugin', 80 debug: bool = False 81) -> Optional[int]: 82 """ 83 Return a plugin's ID. 84 """ 85 ### ensure plugins table exists 86 from meerschaum.connectors.sql.tables import get_tables 87 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 88 from meerschaum.utils.packages import attempt_import 89 sqlalchemy = attempt_import('sqlalchemy') 90 91 query = ( 92 sqlalchemy 93 .select(plugins_tbl.c.plugin_id) 94 .where(plugins_tbl.c.plugin_name == plugin.name) 95 ) 96 97 try: 98 return int(self.value(query, debug=debug)) 99 except Exception as e: 100 return None
Return a plugin's ID.
102def get_plugin_version( 103 self, 104 plugin: 'mrsm.core.Plugin', 105 debug: bool = False 106) -> Optional[str]: 107 """ 108 Return a plugin's version. 109 """ 110 ### ensure plugins table exists 111 from meerschaum.connectors.sql.tables import get_tables 112 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 113 from meerschaum.utils.packages import attempt_import 114 sqlalchemy = attempt_import('sqlalchemy') 115 query = sqlalchemy.select(plugins_tbl.c.version).where(plugins_tbl.c.plugin_name == plugin.name) 116 return self.value(query, debug=debug)
Return a plugin's version.
196def get_plugins( 197 self, 198 user_id: Optional[int] = None, 199 search_term: Optional[str] = None, 200 debug: bool = False, 201 **kw: Any 202) -> List[str]: 203 """ 204 Return a list of all registered plugins. 205 206 Parameters 207 ---------- 208 user_id: Optional[int], default None 209 If specified, filter plugins by a specific `user_id`. 210 211 search_term: Optional[str], default None 212 If specified, add a `WHERE plugin_name LIKE '{search_term}%'` clause to filter the plugins. 213 214 215 Returns 216 ------- 217 A list of plugin names. 218 """ 219 ### ensure plugins table exists 220 from meerschaum.connectors.sql.tables import get_tables 221 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 222 from meerschaum.utils.packages import attempt_import 223 sqlalchemy = attempt_import('sqlalchemy') 224 225 query = sqlalchemy.select(plugins_tbl.c.plugin_name) 226 if user_id is not None: 227 query = query.where(plugins_tbl.c.user_id == user_id) 228 if search_term is not None: 229 query = query.where(plugins_tbl.c.plugin_name.like(search_term + '%')) 230 231 rows = ( 232 self.execute(query).fetchall() 233 if self.flavor != 'duckdb' 234 else [ 235 (row['plugin_name'],) 236 for row in self.read(query).to_dict(orient='records') 237 ] 238 ) 239 240 return [row[0] for row in rows]
Return a list of all registered plugins.
Parameters
- user_id (Optional[int], default None):
If specified, filter plugins by a specific
user_id
. - search_term (Optional[str], default None):
If specified, add a
WHERE plugin_name LIKE '{search_term}%'
clause to filter the plugins.
Returns
- A list of plugin names.
118def get_plugin_user_id( 119 self, 120 plugin: 'mrsm.core.Plugin', 121 debug: bool = False 122) -> Optional[int]: 123 """ 124 Return a plugin's user ID. 125 """ 126 ### ensure plugins table exists 127 from meerschaum.connectors.sql.tables import get_tables 128 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 129 from meerschaum.utils.packages import attempt_import 130 sqlalchemy = attempt_import('sqlalchemy') 131 132 query = ( 133 sqlalchemy 134 .select(plugins_tbl.c.user_id) 135 .where(plugins_tbl.c.plugin_name == plugin.name) 136 ) 137 138 try: 139 return int(self.value(query, debug=debug)) 140 except Exception as e: 141 return None
Return a plugin's user ID.
143def get_plugin_username( 144 self, 145 plugin: 'mrsm.core.Plugin', 146 debug: bool = False 147) -> Optional[str]: 148 """ 149 Return the username of a plugin's owner. 150 """ 151 ### ensure plugins table exists 152 from meerschaum.connectors.sql.tables import get_tables 153 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 154 users = get_tables(mrsm_instance=self, debug=debug)['users'] 155 from meerschaum.utils.packages import attempt_import 156 sqlalchemy = attempt_import('sqlalchemy') 157 158 query = ( 159 sqlalchemy.select(users.c.username) 160 .where( 161 users.c.user_id == plugins_tbl.c.user_id 162 and plugins_tbl.c.plugin_name == plugin.name 163 ) 164 ) 165 166 return self.value(query, debug=debug)
Return the username of a plugin's owner.
169def get_plugin_attributes( 170 self, 171 plugin: 'mrsm.core.Plugin', 172 debug: bool = False 173) -> Dict[str, Any]: 174 """ 175 Return the attributes of a plugin. 176 """ 177 ### ensure plugins table exists 178 from meerschaum.connectors.sql.tables import get_tables 179 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 180 from meerschaum.utils.packages import attempt_import 181 sqlalchemy = attempt_import('sqlalchemy') 182 183 query = ( 184 sqlalchemy 185 .select(plugins_tbl.c.attributes) 186 .where(plugins_tbl.c.plugin_name == plugin.name) 187 ) 188 189 _attr = self.value(query, debug=debug) 190 if isinstance(_attr, str): 191 _attr = json.loads(_attr) 192 elif _attr is None: 193 _attr = {} 194 return _attr
Return the attributes of a plugin.
16def register_user( 17 self, 18 user: mrsm.core.User, 19 debug: bool = False, 20 **kw: Any 21) -> SuccessTuple: 22 """Register a new user.""" 23 from meerschaum.utils.warnings import warn, error, info 24 from meerschaum.utils.packages import attempt_import 25 from meerschaum.utils.sql import json_flavors 26 sqlalchemy = attempt_import('sqlalchemy') 27 28 valid_tuple = valid_username(user.username) 29 if not valid_tuple[0]: 30 return valid_tuple 31 32 old_id = self.get_user_id(user, debug=debug) 33 34 if old_id is not None: 35 return False, f"User '{user}' already exists." 36 37 ### ensure users table exists 38 from meerschaum.connectors.sql.tables import get_tables 39 tables = get_tables(mrsm_instance=self, debug=debug) 40 41 import json 42 bind_variables = { 43 'username': user.username, 44 'email': user.email, 45 'password_hash': user.password_hash, 46 'user_type': user.type, 47 'attributes': ( 48 json.dumps(user.attributes) if self.flavor not in json_flavors else user.attributes 49 ), 50 } 51 if old_id is not None: 52 return False, f"User '{user.username}' already exists." 53 if old_id is None: 54 query = ( 55 sqlalchemy.insert(tables['users']). 56 values(**bind_variables) 57 ) 58 59 result = self.exec(query, debug=debug) 60 if result is None: 61 return False, f"Failed to register user '{user}'." 62 return True, f"Successfully registered user '{user}'."
Register a new user.
154def get_user_id( 155 self, 156 user: 'mrsm.core.User', 157 debug: bool = False 158) -> Optional[int]: 159 """If a user is registered, return the `user_id`.""" 160 ### ensure users table exists 161 from meerschaum.utils.packages import attempt_import 162 sqlalchemy = attempt_import('sqlalchemy') 163 from meerschaum.connectors.sql.tables import get_tables 164 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 165 166 query = ( 167 sqlalchemy.select(users_tbl.c.user_id) 168 .where(users_tbl.c.username == user.username) 169 ) 170 171 result = self.value(query, debug=debug) 172 if result is not None: 173 return int(result) 174 return None
If a user is registered, return the user_id
.
248def get_users( 249 self, 250 debug: bool = False, 251 **kw: Any 252) -> List[str]: 253 """ 254 Get the registered usernames. 255 """ 256 ### ensure users table exists 257 from meerschaum.connectors.sql.tables import get_tables 258 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 259 from meerschaum.utils.packages import attempt_import 260 sqlalchemy = attempt_import('sqlalchemy') 261 262 query = sqlalchemy.select(users_tbl.c.username) 263 264 return list(self.read(query, debug=debug)['username'])
Get the registered usernames.
99def edit_user( 100 self, 101 user: 'mrsm.core.User', 102 debug: bool = False, 103 **kw: Any 104) -> SuccessTuple: 105 """Update an existing user's metadata.""" 106 from meerschaum.utils.packages import attempt_import 107 sqlalchemy = attempt_import('sqlalchemy') 108 from meerschaum.connectors.sql.tables import get_tables 109 from meerschaum.utils.sql import json_flavors 110 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 111 112 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 113 if user_id is None: 114 return False, ( 115 f"User '{user.username}' does not exist. " + 116 f"Register user '{user.username}' before editing." 117 ) 118 user.user_id = user_id 119 120 import json 121 valid_tuple = valid_username(user.username) 122 if not valid_tuple[0]: 123 return valid_tuple 124 125 bind_variables = { 126 'user_id' : user_id, 127 'username' : user.username, 128 } 129 if user.password != '': 130 bind_variables['password_hash'] = user.password_hash 131 if user.email != '': 132 bind_variables['email'] = user.email 133 if user.attributes is not None and user.attributes != {}: 134 bind_variables['attributes'] = ( 135 json.dumps(user.attributes) if self.flavor in ('duckdb',) 136 else user.attributes 137 ) 138 if user.type != '': 139 bind_variables['user_type'] = user.type 140 141 query = ( 142 sqlalchemy 143 .update(users_tbl) 144 .values(**bind_variables) 145 .where(users_tbl.c.user_id == user_id) 146 ) 147 148 result = self.exec(query, debug=debug) 149 if result is None: 150 return False, f"Failed to edit user '{user}'." 151 return True, f"Successfully edited user '{user}'."
Update an existing user's metadata.
216def delete_user( 217 self, 218 user: 'mrsm.core.User', 219 debug: bool = False 220) -> SuccessTuple: 221 """Delete a user's record from the users table.""" 222 ### ensure users table exists 223 from meerschaum.connectors.sql.tables import get_tables 224 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 225 plugins = get_tables(mrsm_instance=self, debug=debug)['plugins'] 226 from meerschaum.utils.packages import attempt_import 227 sqlalchemy = attempt_import('sqlalchemy') 228 229 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 230 231 if user_id is None: 232 return False, f"User '{user.username}' is not registered and cannot be deleted." 233 234 query = sqlalchemy.delete(users_tbl).where(users_tbl.c.user_id == user_id) 235 236 result = self.exec(query, debug=debug) 237 if result is None: 238 return False, f"Failed to delete user '{user}'." 239 240 query = sqlalchemy.delete(plugins).where(plugins.c.user_id == user_id) 241 result = self.exec(query, debug=debug) 242 if result is None: 243 return False, f"Failed to delete plugins of user '{user}'." 244 245 return True, f"Successfully deleted user '{user}'"
Delete a user's record from the users table.
267def get_user_password_hash( 268 self, 269 user: 'mrsm.core.User', 270 debug: bool = False, 271 **kw: Any 272) -> Optional[str]: 273 """ 274 Return the password has for a user. 275 **NOTE**: This may be dangerous and is only allowed if the security settings explicity allow it. 276 """ 277 from meerschaum.utils.debug import dprint 278 from meerschaum.connectors.sql.tables import get_tables 279 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 280 from meerschaum.utils.packages import attempt_import 281 sqlalchemy = attempt_import('sqlalchemy') 282 283 if user.user_id is not None: 284 user_id = user.user_id 285 if debug: 286 dprint(f"Already given user_id: {user_id}") 287 else: 288 if debug: 289 dprint("Fetching user_id...") 290 user_id = self.get_user_id(user, debug=debug) 291 292 if user_id is None: 293 return None 294 295 query = sqlalchemy.select(users_tbl.c.password_hash).where(users_tbl.c.user_id == user_id) 296 297 return self.value(query, debug=debug)
Return the password has for a user. NOTE: This may be dangerous and is only allowed if the security settings explicity allow it.
300def get_user_type( 301 self, 302 user: 'mrsm.core.User', 303 debug: bool = False, 304 **kw: Any 305) -> Optional[str]: 306 """ 307 Return the user's type. 308 """ 309 from meerschaum.connectors.sql.tables import get_tables 310 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 311 from meerschaum.utils.packages import attempt_import 312 sqlalchemy = attempt_import('sqlalchemy') 313 314 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 315 316 if user_id is None: 317 return None 318 319 query = sqlalchemy.select(users_tbl.c.user_type).where(users_tbl.c.user_id == user_id) 320 321 return self.value(query, debug=debug)
Return the user's type.
176def get_user_attributes( 177 self, 178 user: 'mrsm.core.User', 179 debug: bool = False 180) -> Union[Dict[str, Any], None]: 181 """ 182 Return the user's attributes. 183 """ 184 ### ensure users table exists 185 from meerschaum.utils.warnings import warn 186 from meerschaum.utils.packages import attempt_import 187 sqlalchemy = attempt_import('sqlalchemy') 188 from meerschaum.connectors.sql.tables import get_tables 189 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 190 191 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 192 193 query = ( 194 sqlalchemy.select(users_tbl.c.attributes) 195 .where(users_tbl.c.user_id == user_id) 196 ) 197 198 result = self.value(query, debug=debug) 199 if result is not None and not isinstance(result, dict): 200 try: 201 result = dict(result) 202 _parsed = True 203 except Exception as e: 204 _parsed = False 205 if not _parsed: 206 try: 207 import json 208 result = json.loads(result) 209 _parsed = True 210 except Exception as e: 211 _parsed = False 212 if not _parsed: 213 warn(f"Received unexpected type for attributes: {result}") 214 return result
Return the user's attributes.
15@classmethod 16def from_uri( 17 cls, 18 uri: str, 19 label: Optional[str] = None, 20 as_dict: bool = False, 21 ) -> Union[ 22 'meerschaum.connectors.SQLConnector', 23 Dict[str, Union[str, int]], 24 ]: 25 """ 26 Create a new SQLConnector from a URI string. 27 28 Parameters 29 ---------- 30 uri: str 31 The URI connection string. 32 33 label: Optional[str], default None 34 If provided, use this as the connector label. 35 Otherwise use the determined database name. 36 37 as_dict: bool, default False 38 If `True`, return a dictionary of the keyword arguments 39 necessary to create a new `SQLConnector`, otherwise create a new object. 40 41 Returns 42 ------- 43 A new SQLConnector object or a dictionary of attributes (if `as_dict` is `True`). 44 """ 45 46 params = cls.parse_uri(uri) 47 params['uri'] = uri 48 flavor = params.get('flavor', None) 49 if not flavor or flavor not in cls.flavor_configs: 50 error(f"Invalid flavor '{flavor}' detected from the provided URI.") 51 52 if 'database' not in params: 53 error("Unable to determine the database from the provided URI.") 54 55 if flavor in ('sqlite', 'duckdb'): 56 if params['database'] == ':memory:': 57 params['label'] = label or f'memory_{flavor}' 58 else: 59 params['label'] = label or params['database'].split(os.path.sep)[-1].lower() 60 else: 61 params['label'] = label or ( 62 ( 63 (params['username'] + '@' if 'username' in params else '') 64 + params.get('host', '') 65 + ('/' if 'host' in params else '') 66 + params.get('database', '') 67 ).lower() 68 ) 69 70 return cls(**params) if not as_dict else params
Create a new SQLConnector from a URI string.
Parameters
- uri (str): The URI connection string.
- label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
- as_dict (bool, default False):
If
True
, return a dictionary of the keyword arguments necessary to create a newSQLConnector
, otherwise create a new object.
Returns
- A new SQLConnector object or a dictionary of attributes (if
as_dict
isTrue
).
73@staticmethod 74def parse_uri(uri: str) -> Dict[str, Any]: 75 """ 76 Parse a URI string into a dictionary of parameters. 77 78 Parameters 79 ---------- 80 uri: str 81 The database connection URI. 82 83 Returns 84 ------- 85 A dictionary of attributes. 86 87 Examples 88 -------- 89 >>> parse_uri('sqlite:////home/foo/bar.db') 90 {'database': '/home/foo/bar.db', 'flavor': 'sqlite'} 91 >>> parse_uri( 92 ... 'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439' 93 ... + '/master?driver=ODBC+Driver+17+for+SQL+Server' 94 ... ) 95 {'host': 'localhost', 'database': 'master', 'username': 'sa', 96 'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql', 97 'driver': 'ODBC Driver 17 for SQL Server'} 98 >>> 99 """ 100 from urllib.parse import parse_qs, urlparse 101 sqlalchemy = attempt_import('sqlalchemy') 102 parser = sqlalchemy.engine.url.make_url 103 params = parser(uri).translate_connect_args() 104 params['flavor'] = uri.split(':')[0].split('+')[0] 105 if params['flavor'] == 'postgres': 106 params['flavor'] = 'postgresql' 107 if '?' in uri: 108 parsed_uri = urlparse(uri) 109 for key, value in parse_qs(parsed_uri.query).items(): 110 params.update({key: value[0]}) 111 112 if '--search_path' in params.get('options', ''): 113 params.update({'schema': params['options'].replace('--search_path=', '', 1)}) 114 return params
Parse a URI string into a dictionary of parameters.
Parameters
- uri (str): The database connection URI.
Returns
- A dictionary of attributes.
Examples
>>> parse_uri('sqlite:////home/foo/bar.db')
{'database': '/home/foo/bar.db', 'flavor': 'sqlite'}
>>> parse_uri(
... 'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439'
... + '/master?driver=ODBC+Driver+17+for+SQL+Server'
... )
{'host': 'localhost', 'database': 'master', 'username': 'sa',
'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql',
'driver': 'ODBC Driver 17 for SQL Server'}
>>>
20class APIConnector(Connector): 21 """ 22 Connect to a Meerschaum API instance. 23 """ 24 25 IS_INSTANCE: bool = True 26 IS_THREAD_SAFE: bool = False 27 28 OPTIONAL_ATTRIBUTES: List[str] = ['port'] 29 30 from ._request import ( 31 make_request, 32 get, 33 post, 34 put, 35 patch, 36 delete, 37 wget, 38 ) 39 from ._actions import ( 40 get_actions, 41 do_action, 42 do_action_async, 43 do_action_legacy, 44 ) 45 from ._misc import get_mrsm_version, get_chaining_status 46 from ._pipes import ( 47 register_pipe, 48 fetch_pipes_keys, 49 edit_pipe, 50 sync_pipe, 51 delete_pipe, 52 get_pipe_data, 53 get_pipe_id, 54 get_pipe_attributes, 55 get_sync_time, 56 pipe_exists, 57 create_metadata, 58 get_pipe_rowcount, 59 drop_pipe, 60 clear_pipe, 61 get_pipe_columns_types, 62 ) 63 from ._fetch import fetch 64 from ._plugins import ( 65 register_plugin, 66 install_plugin, 67 delete_plugin, 68 get_plugins, 69 get_plugin_attributes, 70 ) 71 from ._login import login, test_connection 72 from ._users import ( 73 register_user, 74 get_user_id, 75 get_users, 76 edit_user, 77 delete_user, 78 get_user_password_hash, 79 get_user_type, 80 get_user_attributes, 81 ) 82 from ._uri import from_uri 83 from ._jobs import ( 84 get_jobs, 85 get_job, 86 get_job_metadata, 87 get_job_properties, 88 get_job_exists, 89 delete_job, 90 start_job, 91 create_job, 92 stop_job, 93 pause_job, 94 get_logs, 95 get_job_stop_time, 96 monitor_logs, 97 monitor_logs_async, 98 get_job_is_blocking_on_stdin, 99 get_job_began, 100 get_job_ended, 101 get_job_paused, 102 get_job_status, 103 ) 104 105 def __init__( 106 self, 107 label: Optional[str] = None, 108 wait: bool = False, 109 debug: bool = False, 110 **kw 111 ): 112 if 'uri' in kw: 113 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 114 label = label or from_uri_params.get('label', None) 115 _ = from_uri_params.pop('label', None) 116 kw.update(from_uri_params) 117 118 super().__init__('api', label=label, **kw) 119 if 'protocol' not in self.__dict__: 120 self.protocol = ( 121 'https' if self.__dict__.get('uri', '').startswith('https') 122 else 'http' 123 ) 124 125 if 'uri' not in self.__dict__: 126 self.verify_attributes(required_attributes) 127 else: 128 from meerschaum.connectors.sql import SQLConnector 129 conn_attrs = SQLConnector.parse_uri(self.__dict__['uri']) 130 if 'host' not in conn_attrs: 131 raise Exception(f"Invalid URI for '{self}'.") 132 self.__dict__.update(conn_attrs) 133 134 self.url = ( 135 self.protocol + '://' + 136 self.host 137 + ( 138 (':' + str(self.port)) 139 if self.__dict__.get('port', None) 140 else '' 141 ) 142 ) 143 self._token = None 144 self._expires = None 145 self._session = None 146 147 148 @property 149 def URI(self) -> str: 150 """ 151 Return the fully qualified URI. 152 """ 153 username = self.__dict__.get('username', None) 154 password = self.__dict__.get('password', None) 155 creds = (username + ':' + password + '@') if username and password else '' 156 return ( 157 self.protocol 158 + '://' 159 + creds 160 + self.host 161 + ( 162 (':' + str(self.port)) 163 if self.__dict__.get('port', None) 164 else '' 165 ) 166 ) 167 168 169 @property 170 def session(self): 171 if self._session is None: 172 certifi = attempt_import('certifi', lazy=False) 173 requests = attempt_import('requests', lazy=False) 174 if requests: 175 self._session = requests.Session() 176 if self._session is None: 177 error(f"Failed to import requests. Is requests installed?") 178 return self._session 179 180 @property 181 def token(self): 182 expired = ( 183 True if self._expires is None else ( 184 ( 185 self._expires 186 < 187 datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1) 188 ) 189 ) 190 ) 191 192 if self._token is None or expired: 193 success, msg = self.login() 194 if not success: 195 warn(msg, stack=False) 196 return self._token
Connect to a Meerschaum API instance.
105 def __init__( 106 self, 107 label: Optional[str] = None, 108 wait: bool = False, 109 debug: bool = False, 110 **kw 111 ): 112 if 'uri' in kw: 113 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 114 label = label or from_uri_params.get('label', None) 115 _ = from_uri_params.pop('label', None) 116 kw.update(from_uri_params) 117 118 super().__init__('api', label=label, **kw) 119 if 'protocol' not in self.__dict__: 120 self.protocol = ( 121 'https' if self.__dict__.get('uri', '').startswith('https') 122 else 'http' 123 ) 124 125 if 'uri' not in self.__dict__: 126 self.verify_attributes(required_attributes) 127 else: 128 from meerschaum.connectors.sql import SQLConnector 129 conn_attrs = SQLConnector.parse_uri(self.__dict__['uri']) 130 if 'host' not in conn_attrs: 131 raise Exception(f"Invalid URI for '{self}'.") 132 self.__dict__.update(conn_attrs) 133 134 self.url = ( 135 self.protocol + '://' + 136 self.host 137 + ( 138 (':' + str(self.port)) 139 if self.__dict__.get('port', None) 140 else '' 141 ) 142 ) 143 self._token = None 144 self._expires = None 145 self._session = None
148 @property 149 def URI(self) -> str: 150 """ 151 Return the fully qualified URI. 152 """ 153 username = self.__dict__.get('username', None) 154 password = self.__dict__.get('password', None) 155 creds = (username + ':' + password + '@') if username and password else '' 156 return ( 157 self.protocol 158 + '://' 159 + creds 160 + self.host 161 + ( 162 (':' + str(self.port)) 163 if self.__dict__.get('port', None) 164 else '' 165 ) 166 )
Return the fully qualified URI.
169 @property 170 def session(self): 171 if self._session is None: 172 certifi = attempt_import('certifi', lazy=False) 173 requests = attempt_import('requests', lazy=False) 174 if requests: 175 self._session = requests.Session() 176 if self._session is None: 177 error(f"Failed to import requests. Is requests installed?") 178 return self._session
180 @property 181 def token(self): 182 expired = ( 183 True if self._expires is None else ( 184 ( 185 self._expires 186 < 187 datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1) 188 ) 189 ) 190 ) 191 192 if self._token is None or expired: 193 success, msg = self.login() 194 if not success: 195 warn(msg, stack=False) 196 return self._token
28def make_request( 29 self, 30 method: str, 31 r_url: str, 32 headers: Optional[Dict[str, Any]] = None, 33 use_token: bool = True, 34 debug: bool = False, 35 **kwargs: Any 36) -> 'requests.Response': 37 """ 38 Make a request to this APIConnector's endpoint using the in-memory session. 39 40 Parameters 41 ---------- 42 method: str 43 The kind of request to make. 44 Accepted values: 45 - `'GET'` 46 - `'OPTIONS'` 47 - `'HEAD'` 48 - `'POST'` 49 - `'PUT'` 50 - `'PATCH'` 51 - `'DELETE'` 52 53 r_url: str 54 The relative URL for the endpoint (e.g. `'/pipes'`). 55 56 headers: Optional[Dict[str, Any]], default None 57 The headers to use for the request. 58 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 59 60 use_token: bool, default True 61 If `True`, add the authorization token to the headers. 62 63 debug: bool, default False 64 Verbosity toggle. 65 66 kwargs: Any 67 All other keyword arguments are passed to `requests.request`. 68 69 Returns 70 ------- 71 A `requests.Reponse` object. 72 """ 73 if method.upper() not in METHODS: 74 raise ValueError(f"Method '{method}' is not supported.") 75 76 verify = self.__dict__.get('verify', None) 77 if 'verify' not in kwargs and isinstance(verify, bool): 78 kwargs['verify'] = verify 79 80 headers = ( 81 copy.deepcopy(headers) 82 if isinstance(headers, dict) 83 else {} 84 ) 85 86 if use_token: 87 headers.update({'Authorization': f'Bearer {self.token}'}) 88 89 if 'timeout' not in kwargs: 90 kwargs['timeout'] = STATIC_CONFIG['api']['default_timeout'] 91 92 request_url = urllib.parse.urljoin(self.url, r_url) 93 if debug: 94 dprint(f"[{self}] Sending a '{method.upper()}' request to {request_url}") 95 96 return self.session.request( 97 method.upper(), 98 request_url, 99 headers = headers, 100 **kwargs 101 )
Make a request to this APIConnector's endpoint using the in-memory session.
Parameters
- method (str):
The kind of request to make.
Accepted values:
'GET'
'OPTIONS'
'HEAD'
'POST'
'PUT'
'PATCH'
'DELETE'
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
104def get(self, r_url: str, **kwargs: Any) -> 'requests.Response': 105 """ 106 Wrapper for `requests.get`. 107 108 Parameters 109 ---------- 110 r_url: str 111 The relative URL for the endpoint (e.g. `'/pipes'`). 112 113 headers: Optional[Dict[str, Any]], default None 114 The headers to use for the request. 115 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 116 117 use_token: bool, default True 118 If `True`, add the authorization token to the headers. 119 120 debug: bool, default False 121 Verbosity toggle. 122 123 kwargs: Any 124 All other keyword arguments are passed to `requests.request`. 125 126 Returns 127 ------- 128 A `requests.Reponse` object. 129 130 """ 131 return self.make_request('GET', r_url, **kwargs)
Wrapper for requests.get
.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
134def post(self, r_url: str, **kwargs: Any) -> 'requests.Response': 135 """ 136 Wrapper for `requests.post`. 137 138 Parameters 139 ---------- 140 r_url: str 141 The relative URL for the endpoint (e.g. `'/pipes'`). 142 143 headers: Optional[Dict[str, Any]], default None 144 The headers to use for the request. 145 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 146 147 use_token: bool, default True 148 If `True`, add the authorization token to the headers. 149 150 debug: bool, default False 151 Verbosity toggle. 152 153 kwargs: Any 154 All other keyword arguments are passed to `requests.request`. 155 156 Returns 157 ------- 158 A `requests.Reponse` object. 159 160 """ 161 return self.make_request('POST', r_url, **kwargs)
Wrapper for requests.post
.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
193def put(self, r_url: str, **kwargs: Any) -> 'requests.Response': 194 """ 195 Wrapper for `requests.put`. 196 197 Parameters 198 ---------- 199 r_url: str 200 The relative URL for the endpoint (e.g. `'/pipes'`). 201 202 headers: Optional[Dict[str, Any]], default None 203 The headers to use for the request. 204 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 205 206 use_token: bool, default True 207 If `True`, add the authorization token to the headers. 208 209 debug: bool, default False 210 Verbosity toggle. 211 212 kwargs: Any 213 All other keyword arguments are passed to `requests.request`. 214 215 Returns 216 ------- 217 A `requests.Reponse` object. 218 """ 219 return self.make_request('PUT', r_url, **kwargs)
Wrapper for requests.put
.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
164def patch(self, r_url: str, **kwargs: Any) -> 'requests.Response': 165 """ 166 Wrapper for `requests.patch`. 167 168 Parameters 169 ---------- 170 r_url: str 171 The relative URL for the endpoint (e.g. `'/pipes'`). 172 173 headers: Optional[Dict[str, Any]], default None 174 The headers to use for the request. 175 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 176 177 use_token: bool, default True 178 If `True`, add the authorization token to the headers. 179 180 debug: bool, default False 181 Verbosity toggle. 182 183 kwargs: Any 184 All other keyword arguments are passed to `requests.request`. 185 186 Returns 187 ------- 188 A `requests.Reponse` object. 189 """ 190 return self.make_request('PATCH', r_url, **kwargs)
Wrapper for requests.patch
.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
222def delete(self, r_url: str, **kwargs: Any) -> 'requests.Response': 223 """ 224 Wrapper for `requests.delete`. 225 226 Parameters 227 ---------- 228 r_url: str 229 The relative URL for the endpoint (e.g. `'/pipes'`). 230 231 headers: Optional[Dict[str, Any]], default None 232 The headers to use for the request. 233 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 234 235 use_token: bool, default True 236 If `True`, add the authorization token to the headers. 237 238 debug: bool, default False 239 Verbosity toggle. 240 241 kwargs: Any 242 All other keyword arguments are passed to `requests.request`. 243 244 Returns 245 ------- 246 A `requests.Reponse` object. 247 """ 248 return self.make_request('DELETE', r_url, **kwargs)
Wrapper for requests.delete
.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
251def wget( 252 self, 253 r_url: str, 254 dest: Optional[Union[str, pathlib.Path]] = None, 255 headers: Optional[Dict[str, Any]] = None, 256 use_token: bool = True, 257 debug: bool = False, 258 **kw: Any 259 ) -> pathlib.Path: 260 """Mimic wget with requests. 261 """ 262 from meerschaum.utils.misc import wget 263 if headers is None: 264 headers = {} 265 if use_token: 266 headers.update({'Authorization': f'Bearer {self.token}'}) 267 request_url = urllib.parse.urljoin(self.url, r_url) 268 if debug: 269 dprint( 270 f"[{self}] Downloading {request_url}" 271 + (f' to {dest}' if dest is not None else '') 272 + "..." 273 ) 274 return wget(request_url, dest=dest, headers=headers, **kw)
Mimic wget with requests.
24def get_actions(self): 25 """Get available actions from the API instance.""" 26 return self.get(ACTIONS_ENDPOINT)
Get available actions from the API instance.
29def do_action(self, sysargs: List[str]) -> SuccessTuple: 30 """ 31 Execute a Meerschaum action remotely. 32 """ 33 return asyncio.run(self.do_action_async(sysargs))
Execute a Meerschaum action remotely.
36async def do_action_async( 37 self, 38 sysargs: List[str], 39 callback_function: Callable[[str], None] = partial(print, end=''), 40) -> SuccessTuple: 41 """ 42 Execute an action as a temporary remote job. 43 """ 44 from meerschaum._internal.arguments import remove_api_executor_keys 45 from meerschaum.utils.misc import generate_password 46 sysargs = remove_api_executor_keys(sysargs) 47 48 job_name = TEMP_PREFIX + generate_password(12) 49 job = mrsm.Job(job_name, sysargs, executor_keys=str(self)) 50 51 start_success, start_msg = job.start() 52 if not start_success: 53 return start_success, start_msg 54 55 await job.monitor_logs_async( 56 callback_function=callback_function, 57 stop_on_exit=True, 58 strip_timestamps=True, 59 ) 60 61 success, msg = job.result 62 job.delete() 63 return success, msg
Execute an action as a temporary remote job.
66def do_action_legacy( 67 self, 68 action: Optional[List[str]] = None, 69 sysargs: Optional[List[str]] = None, 70 debug: bool = False, 71 **kw 72) -> SuccessTuple: 73 """ 74 NOTE: This method is deprecated. 75 Please use `do_action()` or `do_action_async()`. 76 77 Execute a Meerschaum action remotely. 78 79 If `sysargs` are provided, parse those instead. 80 Otherwise infer everything from keyword arguments. 81 82 Examples 83 -------- 84 >>> conn = mrsm.get_connector('api:main') 85 >>> conn.do_action(['show', 'pipes']) 86 (True, "Success") 87 >>> conn.do_action(['show', 'arguments'], name='test') 88 (True, "Success") 89 """ 90 import sys, json 91 from meerschaum.utils.debug import dprint 92 from meerschaum.config.static import STATIC_CONFIG 93 from meerschaum.utils.misc import json_serialize_datetime 94 if action is None: 95 action = [] 96 97 if sysargs is not None and action and action[0] == '': 98 from meerschaum._internal.arguments import parse_arguments 99 if debug: 100 dprint(f"Parsing sysargs:\n{sysargs}") 101 json_dict = parse_arguments(sysargs) 102 else: 103 json_dict = kw 104 json_dict['action'] = action 105 if 'noask' not in kw: 106 json_dict['noask'] = True 107 if 'yes' not in kw: 108 json_dict['yes'] = True 109 if debug: 110 json_dict['debug'] = debug 111 112 root_action = json_dict['action'][0] 113 del json_dict['action'][0] 114 r_url = f"{STATIC_CONFIG['api']['endpoints']['actions']}/{root_action}" 115 116 if debug: 117 from meerschaum.utils.formatting import pprint 118 dprint(f"Sending data to '{self.url + r_url}':") 119 pprint(json_dict, stream=sys.stderr) 120 121 response = self.post( 122 r_url, 123 data = json.dumps(json_dict, default=json_serialize_datetime), 124 debug = debug, 125 ) 126 try: 127 response_list = json.loads(response.text) 128 if isinstance(response_list, dict) and 'detail' in response_list: 129 return False, response_list['detail'] 130 except Exception as e: 131 print(f"Invalid response: {response}") 132 print(e) 133 return False, response.text 134 if debug: 135 dprint(response) 136 try: 137 return response_list[0], response_list[1] 138 except Exception as e: 139 return False, f"Failed to parse result from action '{root_action}'"
NOTE: This method is deprecated.
Please use do_action()
or do_action_async()
.
Execute a Meerschaum action remotely.
If sysargs
are provided, parse those instead.
Otherwise infer everything from keyword arguments.
Examples
>>> conn = mrsm.get_connector('api:main')
>>> conn.do_action(['show', 'pipes'])
(True, "Success")
>>> conn.do_action(['show', 'arguments'], name='test')
(True, "Success")
13def get_mrsm_version(self, **kw) -> Optional[str]: 14 """ 15 Return the Meerschaum version of the API instance. 16 """ 17 from meerschaum.config.static import STATIC_CONFIG 18 try: 19 j = self.get( 20 STATIC_CONFIG['api']['endpoints']['version'] + '/mrsm', 21 use_token=False, 22 **kw 23 ).json() 24 except Exception as e: 25 return None 26 if isinstance(j, dict) and 'detail' in j: 27 return None 28 return j
Return the Meerschaum version of the API instance.
30def get_chaining_status(self, **kw) -> Optional[bool]: 31 """ 32 Fetch the chaining status of the API instance. 33 """ 34 from meerschaum.config.static import STATIC_CONFIG 35 try: 36 response = self.get( 37 STATIC_CONFIG['api']['endpoints']['chaining'], 38 use_token = True, 39 **kw 40 ) 41 if not response: 42 return None 43 except Exception as e: 44 return None 45 46 return response.json()
Fetch the chaining status of the API instance.
34def register_pipe( 35 self, 36 pipe: mrsm.Pipe, 37 debug: bool = False 38) -> SuccessTuple: 39 """Submit a POST to the API to register a new Pipe object. 40 Returns a tuple of (success_bool, response_dict). 41 """ 42 from meerschaum.utils.debug import dprint 43 from meerschaum.config.static import STATIC_CONFIG 44 ### NOTE: if `parameters` is supplied in the Pipe constructor, 45 ### then `pipe.parameters` will exist and not be fetched from the database. 46 r_url = pipe_r_url(pipe) 47 response = self.post( 48 r_url + '/register', 49 json = pipe.parameters, 50 debug = debug, 51 ) 52 if debug: 53 dprint(response.text) 54 if isinstance(response.json(), list): 55 response_tuple = response.__bool__(), response.json()[1] 56 elif 'detail' in response.json(): 57 response_tuple = response.__bool__(), response.json()['detail'] 58 else: 59 response_tuple = response.__bool__(), response.text 60 return response_tuple
Submit a POST to the API to register a new Pipe object. Returns a tuple of (success_bool, response_dict).
93def fetch_pipes_keys( 94 self, 95 connector_keys: Optional[List[str]] = None, 96 metric_keys: Optional[List[str]] = None, 97 location_keys: Optional[List[str]] = None, 98 tags: Optional[List[str]] = None, 99 params: Optional[Dict[str, Any]] = None, 100 debug: bool = False 101) -> Union[List[Tuple[str, str, Union[str, None]]]]: 102 """ 103 Fetch registered Pipes' keys from the API. 104 105 Parameters 106 ---------- 107 connector_keys: Optional[List[str]], default None 108 The connector keys for the query. 109 110 metric_keys: Optional[List[str]], default None 111 The metric keys for the query. 112 113 location_keys: Optional[List[str]], default None 114 The location keys for the query. 115 116 tags: Optional[List[str]], default None 117 A list of tags for the query. 118 119 params: Optional[Dict[str, Any]], default None 120 A parameters dictionary for filtering against the `pipes` table 121 (e.g. `{'connector_keys': 'plugin:foo'}`). 122 Not recommeded to be used. 123 124 debug: bool, default False 125 Verbosity toggle. 126 127 Returns 128 ------- 129 A list of tuples containing pipes' keys. 130 """ 131 from meerschaum.config.static import STATIC_CONFIG 132 if connector_keys is None: 133 connector_keys = [] 134 if metric_keys is None: 135 metric_keys = [] 136 if location_keys is None: 137 location_keys = [] 138 if tags is None: 139 tags = [] 140 141 r_url = STATIC_CONFIG['api']['endpoints']['pipes'] + '/keys' 142 try: 143 j = self.get( 144 r_url, 145 params = { 146 'connector_keys': json.dumps(connector_keys), 147 'metric_keys': json.dumps(metric_keys), 148 'location_keys': json.dumps(location_keys), 149 'tags': json.dumps(tags), 150 'params': json.dumps(params), 151 }, 152 debug=debug 153 ).json() 154 except Exception as e: 155 error(str(e)) 156 157 if 'detail' in j: 158 error(j['detail'], stack=False) 159 return [tuple(r) for r in j]
Fetch registered Pipes' keys from the API.
Parameters
- connector_keys (Optional[List[str]], default None): The connector keys for the query.
- metric_keys (Optional[List[str]], default None): The metric keys for the query.
- location_keys (Optional[List[str]], default None): The location keys for the query.
- tags (Optional[List[str]], default None): A list of tags for the query.
- params (Optional[Dict[str, Any]], default None):
A parameters dictionary for filtering against the
pipes
table (e.g.{'connector_keys': 'plugin:foo'}
). Not recommeded to be used. - debug (bool, default False): Verbosity toggle.
Returns
- A list of tuples containing pipes' keys.
63def edit_pipe( 64 self, 65 pipe: mrsm.Pipe, 66 patch: bool = False, 67 debug: bool = False, 68) -> SuccessTuple: 69 """Submit a PATCH to the API to edit an existing Pipe object. 70 Returns a tuple of (success_bool, response_dict). 71 """ 72 from meerschaum.utils.debug import dprint 73 ### NOTE: if `parameters` is supplied in the Pipe constructor, 74 ### then `pipe.parameters` will exist and not be fetched from the database. 75 r_url = pipe_r_url(pipe) 76 response = self.patch( 77 r_url + '/edit', 78 params = {'patch': patch,}, 79 json = pipe.parameters, 80 debug = debug, 81 ) 82 if debug: 83 dprint(response.text) 84 if isinstance(response.json(), list): 85 response_tuple = response.__bool__(), response.json()[1] 86 elif 'detail' in response.json(): 87 response_tuple = response.__bool__(), response.json()['detail'] 88 else: 89 response_tuple = response.__bool__(), response.text 90 return response_tuple
Submit a PATCH to the API to edit an existing Pipe object. Returns a tuple of (success_bool, response_dict).
162def sync_pipe( 163 self, 164 pipe: mrsm.Pipe, 165 df: Optional[Union['pd.DataFrame', Dict[Any, Any], str]] = None, 166 chunksize: Optional[int] = -1, 167 debug: bool = False, 168 **kw: Any 169) -> SuccessTuple: 170 """Sync a DataFrame into a Pipe.""" 171 from decimal import Decimal 172 from meerschaum.utils.debug import dprint 173 from meerschaum.utils.misc import json_serialize_datetime, items_str 174 from meerschaum.config import get_config 175 from meerschaum.utils.packages import attempt_import 176 from meerschaum.utils.dataframe import get_numeric_cols, to_json 177 begin = time.time() 178 more_itertools = attempt_import('more_itertools') 179 if df is None: 180 msg = f"DataFrame is `None`. Cannot sync {pipe}." 181 return False, msg 182 183 def get_json_str(c): 184 ### allow syncing dict or JSON without needing to import pandas (for IOT devices) 185 if isinstance(c, (dict, list)): 186 return json.dumps(c, default=json_serialize_datetime) 187 return to_json(c, orient='columns') 188 189 df = json.loads(df) if isinstance(df, str) else df 190 191 _chunksize: Optional[int] = (1 if chunksize is None else ( 192 get_config('system', 'connectors', 'sql', 'chunksize') if chunksize == -1 193 else chunksize 194 )) 195 keys: List[str] = list(df.columns) 196 chunks = [] 197 if hasattr(df, 'index'): 198 df = df.reset_index(drop=True) 199 is_dask = 'dask' in df.__module__ 200 chunks = ( 201 (df.iloc[i] for i in more_itertools.chunked(df.index, _chunksize)) 202 if not is_dask 203 else [partition.compute() for partition in df.partitions] 204 ) 205 206 numeric_cols = get_numeric_cols(df) 207 if numeric_cols: 208 for col in numeric_cols: 209 df[col] = df[col].apply(lambda x: f'{x:f}' if isinstance(x, Decimal) else x) 210 pipe_dtypes = pipe.dtypes 211 new_numeric_cols = [ 212 col 213 for col in numeric_cols 214 if pipe_dtypes.get(col, None) != 'numeric' 215 ] 216 pipe.dtypes.update({ 217 col: 'numeric' 218 for col in new_numeric_cols 219 }) 220 edit_success, edit_msg = pipe.edit(debug=debug) 221 if not edit_success: 222 warn( 223 "Failed to update new numeric columns " 224 + f"{items_str(new_numeric_cols)}:\n{edit_msg}" 225 ) 226 elif isinstance(df, dict): 227 ### `_chunks` is a dict of lists of dicts. 228 ### e.g. {'a' : [ {'a':[1, 2]}, {'a':[3, 4]} ] } 229 _chunks = {k: [] for k in keys} 230 for k in keys: 231 chunk_iter = more_itertools.chunked(df[k], _chunksize) 232 for l in chunk_iter: 233 _chunks[k].append({k: l}) 234 235 ### `chunks` is a list of dicts (e.g. orient by rows in pandas JSON). 236 for k, l in _chunks.items(): 237 for i, c in enumerate(l): 238 try: 239 chunks[i].update(c) 240 except IndexError: 241 chunks.append(c) 242 elif isinstance(df, list): 243 chunks = (df[i] for i in more_itertools.chunked(df, _chunksize)) 244 245 ### Send columns in case the user has defined them locally. 246 if pipe.columns: 247 kw['columns'] = json.dumps(pipe.columns) 248 r_url = pipe_r_url(pipe) + '/data' 249 250 rowcount = 0 251 num_success_chunks = 0 252 for i, c in enumerate(chunks): 253 if debug: 254 dprint(f"[{self}] Posting chunk {i} to {r_url}...") 255 if len(c) == 0: 256 if debug: 257 dprint(f"[{self}] Skipping empty chunk...") 258 continue 259 json_str = get_json_str(c) 260 261 try: 262 response = self.post( 263 r_url, 264 ### handles check_existing 265 params = kw, 266 data = json_str, 267 debug = debug 268 ) 269 except Exception as e: 270 msg = f"Failed to post a chunk to {pipe}:\n{e}" 271 warn(msg) 272 return False, msg 273 274 if not response: 275 return False, f"Failed to sync a chunk:\n{response.text}" 276 277 try: 278 j = json.loads(response.text) 279 except Exception as e: 280 return False, f"Failed to parse response from syncing {pipe}:\n{e}" 281 282 if isinstance(j, dict) and 'detail' in j: 283 return False, j['detail'] 284 285 try: 286 j = tuple(j) 287 except Exception as e: 288 return False, response.text 289 290 if debug: 291 dprint("Received response: " + str(j)) 292 if not j[0]: 293 return j 294 295 rowcount += len(c) 296 num_success_chunks += 1 297 298 success_tuple = True, ( 299 f"It took {round(time.time() - begin, 2)} seconds to sync {rowcount} row" 300 + ('s' if rowcount != 1 else '') 301 + f" across {num_success_chunks} chunk" + ('s' if num_success_chunks != 1 else '') + 302 f" to {pipe}." 303 ) 304 return success_tuple
Sync a DataFrame into a Pipe.
307def delete_pipe( 308 self, 309 pipe: Optional[meerschaum.Pipe] = None, 310 debug: bool = None, 311) -> SuccessTuple: 312 """Delete a Pipe and drop its table.""" 313 if pipe is None: 314 error(f"Pipe cannot be None.") 315 r_url = pipe_r_url(pipe) 316 response = self.delete( 317 r_url + '/delete', 318 debug = debug, 319 ) 320 if debug: 321 dprint(response.text) 322 if isinstance(response.json(), list): 323 response_tuple = response.__bool__(), response.json()[1] 324 elif 'detail' in response.json(): 325 response_tuple = response.__bool__(), response.json()['detail'] 326 else: 327 response_tuple = response.__bool__(), response.text 328 return response_tuple
Delete a Pipe and drop its table.
331def get_pipe_data( 332 self, 333 pipe: meerschaum.Pipe, 334 select_columns: Optional[List[str]] = None, 335 omit_columns: Optional[List[str]] = None, 336 begin: Union[str, datetime, int, None] = None, 337 end: Union[str, datetime, int, None] = None, 338 params: Optional[Dict[str, Any]] = None, 339 as_chunks: bool = False, 340 debug: bool = False, 341 **kw: Any 342) -> Union[pandas.DataFrame, None]: 343 """Fetch data from the API.""" 344 r_url = pipe_r_url(pipe) 345 chunks_list = [] 346 while True: 347 try: 348 response = self.get( 349 r_url + "/data", 350 params={ 351 'select_columns': json.dumps(select_columns), 352 'omit_columns': json.dumps(omit_columns), 353 'begin': begin, 354 'end': end, 355 'params': json.dumps(params, default=str) 356 }, 357 debug=debug 358 ) 359 if not response.ok: 360 return None 361 j = response.json() 362 except Exception as e: 363 warn(f"Failed to get data for {pipe}:\n{e}") 364 return None 365 if isinstance(j, dict) and 'detail' in j: 366 return False, j['detail'] 367 break 368 369 from meerschaum.utils.packages import import_pandas 370 from meerschaum.utils.dataframe import parse_df_datetimes, add_missing_cols_to_df 371 pd = import_pandas() 372 try: 373 df = pd.read_json(StringIO(response.text)) 374 except Exception as e: 375 warn(f"Failed to parse response for {pipe}:\n{e}") 376 return None 377 378 if len(df.columns) == 0: 379 return add_missing_cols_to_df(df, pipe.dtypes) 380 381 df = parse_df_datetimes( 382 df, 383 ignore_cols = [ 384 col 385 for col, dtype in pipe.dtypes.items() 386 if 'datetime' not in str(dtype) 387 ], 388 debug = debug, 389 ) 390 return df
Fetch data from the API.
393def get_pipe_id( 394 self, 395 pipe: meerschuam.Pipe, 396 debug: bool = False, 397) -> int: 398 """Get a Pipe's ID from the API.""" 399 from meerschaum.utils.misc import is_int 400 r_url = pipe_r_url(pipe) 401 response = self.get( 402 r_url + '/id', 403 debug = debug 404 ) 405 if debug: 406 dprint(f"Got pipe ID: {response.text}") 407 try: 408 if is_int(response.text): 409 return int(response.text) 410 except Exception as e: 411 warn(f"Failed to get the ID for {pipe}:\n{e}") 412 return None
Get a Pipe's ID from the API.
415def get_pipe_attributes( 416 self, 417 pipe: meerschaum.Pipe, 418 debug: bool = False, 419) -> Dict[str, Any]: 420 """Get a Pipe's attributes from the API 421 422 Parameters 423 ---------- 424 pipe: meerschaum.Pipe 425 The pipe whose attributes we are fetching. 426 427 Returns 428 ------- 429 A dictionary of a pipe's attributes. 430 If the pipe does not exist, return an empty dictionary. 431 """ 432 r_url = pipe_r_url(pipe) 433 response = self.get(r_url + '/attributes', debug=debug) 434 try: 435 return json.loads(response.text) 436 except Exception as e: 437 warn(f"Failed to get the attributes for {pipe}:\n{e}") 438 return {}
Get a Pipe's attributes from the API
Parameters
- pipe (meerschaum.Pipe): The pipe whose attributes we are fetching.
Returns
- A dictionary of a pipe's attributes.
- If the pipe does not exist, return an empty dictionary.
441def get_sync_time( 442 self, 443 pipe: 'meerschaum.Pipe', 444 params: Optional[Dict[str, Any]] = None, 445 newest: bool = True, 446 debug: bool = False, 447) -> Union[datetime, int, None]: 448 """Get a Pipe's most recent datetime value from the API. 449 450 Parameters 451 ---------- 452 pipe: meerschaum.Pipe 453 The pipe to select from. 454 455 params: Optional[Dict[str, Any]], default None 456 Optional params dictionary to build the WHERE clause. 457 458 newest: bool, default True 459 If `True`, get the most recent datetime (honoring `params`). 460 If `False`, get the oldest datetime (ASC instead of DESC). 461 462 Returns 463 ------- 464 The most recent (or oldest if `newest` is `False`) datetime of a pipe, 465 rounded down to the closest minute. 466 """ 467 from meerschaum.utils.misc import is_int 468 from meerschaum.utils.warnings import warn 469 r_url = pipe_r_url(pipe) 470 response = self.get( 471 r_url + '/sync_time', 472 json = params, 473 params = {'newest': newest, 'debug': debug}, 474 debug = debug, 475 ) 476 if not response: 477 warn(f"Failed to get the sync time for {pipe}:\n" + response.text) 478 return None 479 480 j = response.json() 481 if j is None: 482 dt = None 483 else: 484 try: 485 dt = ( 486 datetime.fromisoformat(j) 487 if not is_int(j) 488 else int(j) 489 ) 490 except Exception as e: 491 warn(f"Failed to parse the sync time '{j}' for {pipe}:\n{e}") 492 dt = None 493 return dt
Get a Pipe's most recent datetime value from the API.
Parameters
- pipe (meerschaum.Pipe): The pipe to select from.
- params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause.
- newest (bool, default True):
If
True
, get the most recent datetime (honoringparams
). IfFalse
, get the oldest datetime (ASC instead of DESC).
Returns
- The most recent (or oldest if
newest
isFalse
) datetime of a pipe, - rounded down to the closest minute.
496def pipe_exists( 497 self, 498 pipe: mrsm.Pipe, 499 debug: bool = False 500) -> bool: 501 """Check the API to see if a Pipe exists. 502 503 Parameters 504 ---------- 505 pipe: 'meerschaum.Pipe' 506 The pipe which were are querying. 507 508 Returns 509 ------- 510 A bool indicating whether a pipe's underlying table exists. 511 """ 512 from meerschaum.utils.debug import dprint 513 from meerschaum.utils.warnings import warn 514 r_url = pipe_r_url(pipe) 515 response = self.get(r_url + '/exists', debug=debug) 516 if not response: 517 warn(f"Failed to check if {pipe} exists:\n{response.text}") 518 return False 519 if debug: 520 dprint("Received response: " + str(response.text)) 521 j = response.json() 522 if isinstance(j, dict) and 'detail' in j: 523 warn(j['detail']) 524 return j
Check the API to see if a Pipe exists.
Parameters
- pipe ('meerschaum.Pipe'): The pipe which were are querying.
Returns
- A bool indicating whether a pipe's underlying table exists.
527def create_metadata( 528 self, 529 debug: bool = False 530) -> bool: 531 """Create metadata tables. 532 533 Returns 534 ------- 535 A bool indicating success. 536 """ 537 from meerschaum.utils.debug import dprint 538 from meerschaum.config.static import STATIC_CONFIG 539 r_url = STATIC_CONFIG['api']['endpoints']['metadata'] 540 response = self.post(r_url, debug=debug) 541 if debug: 542 dprint("Create metadata response: {response.text}") 543 try: 544 metadata_response = json.loads(response.text) 545 except Exception as e: 546 warn(f"Failed to create metadata on {self}:\n{e}") 547 metadata_response = False 548 return False
Create metadata tables.
Returns
- A bool indicating success.
551def get_pipe_rowcount( 552 self, 553 pipe: mrsm.Pipe, 554 begin: Optional[datetime] = None, 555 end: Optional[datetime] = None, 556 params: Optional[Dict[str, Any]] = None, 557 remote: bool = False, 558 debug: bool = False, 559) -> int: 560 """Get a pipe's row count from the API. 561 562 Parameters 563 ---------- 564 pipe: 'meerschaum.Pipe': 565 The pipe whose row count we are counting. 566 567 begin: Optional[datetime], default None 568 If provided, bound the count by this datetime. 569 570 end: Optional[datetime] 571 If provided, bound the count by this datetime. 572 573 params: Optional[Dict[str, Any]], default None 574 If provided, bound the count by these parameters. 575 576 remote: bool, default False 577 578 Returns 579 ------- 580 The number of rows in the pipe's table, bound the given parameters. 581 If the table does not exist, return 0. 582 """ 583 r_url = pipe_r_url(pipe) 584 response = self.get( 585 r_url + "/rowcount", 586 json = params, 587 params = { 588 'begin': begin, 589 'end': end, 590 'remote': remote, 591 }, 592 debug = debug 593 ) 594 if not response: 595 warn(f"Failed to get the rowcount for {pipe}:\n{response.text}") 596 return 0 597 try: 598 return int(json.loads(response.text)) 599 except Exception as e: 600 warn(f"Failed to get the rowcount for {pipe}:\n{e}") 601 return 0
Get a pipe's row count from the API.
Parameters
- pipe ('meerschaum.Pipe':): The pipe whose row count we are counting.
- begin (Optional[datetime], default None): If provided, bound the count by this datetime.
- end (Optional[datetime]): If provided, bound the count by this datetime.
- params (Optional[Dict[str, Any]], default None): If provided, bound the count by these parameters.
- remote (bool, default False):
Returns
- The number of rows in the pipe's table, bound the given parameters.
- If the table does not exist, return 0.
604def drop_pipe( 605 self, 606 pipe: mrsm.Pipe, 607 debug: bool = False 608) -> SuccessTuple: 609 """ 610 Drop a pipe's table but maintain its registration. 611 612 Parameters 613 ---------- 614 pipe: meerschaum.Pipe: 615 The pipe to be dropped. 616 617 Returns 618 ------- 619 A success tuple (bool, str). 620 """ 621 from meerschaum.utils.warnings import error 622 from meerschaum.utils.debug import dprint 623 if pipe is None: 624 error(f"Pipe cannot be None.") 625 r_url = pipe_r_url(pipe) 626 response = self.delete( 627 r_url + '/drop', 628 debug = debug, 629 ) 630 if debug: 631 dprint(response.text) 632 633 try: 634 data = response.json() 635 except Exception as e: 636 return False, f"Failed to drop {pipe}." 637 638 if isinstance(data, list): 639 response_tuple = response.__bool__(), data[1] 640 elif 'detail' in response.json(): 641 response_tuple = response.__bool__(), data['detail'] 642 else: 643 response_tuple = response.__bool__(), response.text 644 645 return response_tuple
Drop a pipe's table but maintain its registration.
Parameters
- pipe (meerschaum.Pipe:): The pipe to be dropped.
Returns
- A success tuple (bool, str).
648def clear_pipe( 649 self, 650 pipe: mrsm.Pipe, 651 debug: bool = False, 652 **kw 653) -> SuccessTuple: 654 """ 655 Delete rows in a pipe's table. 656 657 Parameters 658 ---------- 659 pipe: meerschaum.Pipe 660 The pipe with rows to be deleted. 661 662 Returns 663 ------- 664 A success tuple. 665 """ 666 kw.pop('metric_keys', None) 667 kw.pop('connector_keys', None) 668 kw.pop('location_keys', None) 669 kw.pop('action', None) 670 kw.pop('force', None) 671 return self.do_action_legacy( 672 ['clear', 'pipes'], 673 connector_keys = pipe.connector_keys, 674 metric_keys = pipe.metric_key, 675 location_keys = pipe.location_key, 676 force = True, 677 debug = debug, 678 **kw 679 )
Delete rows in a pipe's table.
Parameters
- pipe (meerschaum.Pipe): The pipe with rows to be deleted.
Returns
- A success tuple.
682def get_pipe_columns_types( 683 self, 684 pipe: mrsm.Pipe, 685 debug: bool = False, 686) -> Union[Dict[str, str], None]: 687 """ 688 Fetch the columns and types of the pipe's table. 689 690 Parameters 691 ---------- 692 pipe: meerschaum.Pipe 693 The pipe whose columns to be queried. 694 695 Returns 696 ------- 697 A dictionary mapping column names to their database types. 698 699 Examples 700 -------- 701 >>> { 702 ... 'dt': 'TIMESTAMP WITHOUT TIMEZONE', 703 ... 'id': 'BIGINT', 704 ... 'val': 'DOUBLE PRECISION', 705 ... } 706 >>> 707 """ 708 r_url = pipe_r_url(pipe) + '/columns/types' 709 response = self.get( 710 r_url, 711 debug = debug 712 ) 713 j = response.json() 714 if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1: 715 from meerschaum.utils.warnings import warn 716 warn(j['detail']) 717 return None 718 if not isinstance(j, dict): 719 warn(response.text) 720 return None 721 return j
Fetch the columns and types of the pipe's table.
Parameters
- pipe (meerschaum.Pipe): The pipe whose columns to be queried.
Returns
- A dictionary mapping column names to their database types.
Examples
>>> {
... 'dt': 'TIMESTAMP WITHOUT TIMEZONE',
... 'id': 'BIGINT',
... 'val': 'DOUBLE PRECISION',
... }
>>>
16def fetch( 17 self, 18 pipe: mrsm.Pipe, 19 begin: Union[datetime, str, int] = '', 20 end: Union[datetime, int] = None, 21 params: Optional[Dict, Any] = None, 22 debug: bool = False, 23 **kw: Any 24 ) -> Iterator['pd.DataFrame']: 25 """Get the Pipe data from the remote Pipe.""" 26 from meerschaum.utils.debug import dprint 27 from meerschaum.utils.warnings import warn, error 28 from meerschaum.config._patch import apply_patch_to_config 29 30 fetch_params = pipe.parameters.get('fetch', {}) 31 if not fetch_params: 32 warn(f"Missing 'fetch' parameters for {pipe}.", stack=False) 33 return None 34 35 pipe_meta = fetch_params.get('pipe', {}) 36 ### Legacy: check for `connector_keys`, etc. at the root. 37 if not pipe_meta: 38 ck, mk, lk = ( 39 fetch_params.get('connector_keys', None), 40 fetch_params.get('metric_key', None), 41 fetch_params.get('location_key', None), 42 ) 43 if not ck or not mk: 44 warn(f"Missing `fetch:pipe` keys for {pipe}.", stack=False) 45 return None 46 47 pipe_meta.update({ 48 'connector': ck, 49 'metric': mk, 50 'location': lk, 51 }) 52 53 pipe_meta['instance'] = self 54 source_pipe = mrsm.Pipe(**pipe_meta) 55 56 _params = copy.deepcopy(params) if params is not None else {} 57 _params = apply_patch_to_config(_params, fetch_params.get('params', {})) 58 select_columns = fetch_params.get('select_columns', []) 59 omit_columns = fetch_params.get('omit_columns', []) 60 61 return source_pipe.get_data( 62 select_columns = select_columns, 63 omit_columns = omit_columns, 64 begin = begin, 65 end = end, 66 params = _params, 67 debug = debug, 68 as_iterator = True, 69 )
Get the Pipe data from the remote Pipe.
20def register_plugin( 21 self, 22 plugin: meerschaum.core.Plugin, 23 make_archive: bool = True, 24 debug: bool = False, 25 ) -> SuccessTuple: 26 """Register a plugin and upload its archive.""" 27 import json 28 archive_path = plugin.make_tar(debug=debug) if make_archive else plugin.archive_path 29 file_pointer = open(archive_path, 'rb') 30 files = {'archive': file_pointer} 31 metadata = { 32 'version': plugin.version, 33 'attributes': json.dumps(plugin.attributes), 34 } 35 r_url = plugin_r_url(plugin) 36 try: 37 response = self.post(r_url, files=files, params=metadata, debug=debug) 38 except Exception as e: 39 return False, f"Failed to register plugin '{plugin}'." 40 finally: 41 file_pointer.close() 42 43 try: 44 success, msg = json.loads(response.text) 45 except Exception as e: 46 return False, response.text 47 48 return success, msg
Register a plugin and upload its archive.
50def install_plugin( 51 self, 52 name: str, 53 skip_deps: bool = False, 54 force: bool = False, 55 debug: bool = False 56 ) -> SuccessTuple: 57 """Download and attempt to install a plugin from the API.""" 58 import os, pathlib, json 59 from meerschaum.core import Plugin 60 from meerschaum.config._paths import PLUGINS_TEMP_RESOURCES_PATH 61 from meerschaum.utils.debug import dprint 62 from meerschaum.utils.packages import attempt_import 63 binaryornot_check = attempt_import('binaryornot.check', lazy=False) 64 r_url = plugin_r_url(name) 65 dest = pathlib.Path(os.path.join(PLUGINS_TEMP_RESOURCES_PATH, name + '.tar.gz')) 66 if debug: 67 dprint(f"Fetching from '{self.url + r_url}' to '{dest}'...") 68 archive_path = self.wget(r_url, dest, debug=debug) 69 is_binary = binaryornot_check.is_binary(str(archive_path)) 70 if not is_binary: 71 fail_msg = f"Failed to download binary for plugin '{name}'." 72 try: 73 with open(archive_path, 'r') as f: 74 j = json.load(f) 75 if isinstance(j, list): 76 success, msg = tuple(j) 77 elif isinstance(j, dict) and 'detail' in j: 78 success, msg = False, fail_msg 79 except Exception as e: 80 success, msg = False, fail_msg 81 return success, msg 82 plugin = Plugin(name, archive_path=archive_path, repo_connector=self) 83 return plugin.install(skip_deps=skip_deps, force=force, debug=debug)
Download and attempt to install a plugin from the API.
149def delete_plugin( 150 self, 151 plugin: meerschaum.core.Plugin, 152 debug: bool = False 153 ) -> SuccessTuple: 154 """Delete a plugin from an API repository.""" 155 import json 156 r_url = plugin_r_url(plugin) 157 try: 158 response = self.delete(r_url, debug=debug) 159 except Exception as e: 160 return False, f"Failed to delete plugin '{plugin}'." 161 162 try: 163 success, msg = json.loads(response.text) 164 except Exception as e: 165 return False, response.text 166 167 return success, msg
Delete a plugin from an API repository.
85def get_plugins( 86 self, 87 user_id : Optional[int] = None, 88 search_term : Optional[str] = None, 89 debug : bool = False 90 ) -> Sequence[str]: 91 """Return a list of registered plugin names. 92 93 Parameters 94 ---------- 95 user_id : 96 If specified, return all plugins from a certain user. 97 user_id : Optional[int] : 98 (Default value = None) 99 search_term : Optional[str] : 100 (Default value = None) 101 debug : bool : 102 (Default value = False) 103 104 Returns 105 ------- 106 107 """ 108 import json 109 from meerschaum.utils.warnings import warn, error 110 from meerschaum.config.static import STATIC_CONFIG 111 response = self.get( 112 STATIC_CONFIG['api']['endpoints']['plugins'], 113 params = {'user_id' : user_id, 'search_term' : search_term}, 114 use_token = True, 115 debug = debug 116 ) 117 if not response: 118 return [] 119 plugins = json.loads(response.text) 120 if not isinstance(plugins, list): 121 error(response.text) 122 return plugins
Return a list of registered plugin names.
Parameters
- user_id :: If specified, return all plugins from a certain user.
- user_id (Optional[int] :): (Default value = None)
- search_term (Optional[str] :): (Default value = None)
- debug (bool :): (Default value = False)
- Returns
- -------
124def get_plugin_attributes( 125 self, 126 plugin: meerschaum.core.Plugin, 127 debug: bool = False 128 ) -> Mapping[str, Any]: 129 """ 130 Return a plugin's attributes. 131 """ 132 import json 133 from meerschaum.utils.warnings import warn, error 134 r_url = plugin_r_url(plugin) + '/attributes' 135 response = self.get(r_url, use_token=True, debug=debug) 136 attributes = response.json() 137 if isinstance(attributes, str) and attributes and attributes[0] == '{': 138 try: 139 attributes = json.loads(attributes) 140 except Exception as e: 141 pass 142 if not isinstance(attributes, dict): 143 error(response.text) 144 elif not response and 'detail' in attributes: 145 warn(attributes['detail']) 146 return {} 147 return attributes
Return a plugin's attributes.
13def login( 14 self, 15 debug: bool = False, 16 warn: bool = True, 17 **kw: Any 18 ) -> SuccessTuple: 19 """Log in and set the session token.""" 20 from meerschaum.utils.warnings import warn as _warn, info, error 21 from meerschaum.core import User 22 from meerschaum.config.static import STATIC_CONFIG 23 import json, datetime 24 try: 25 login_data = { 26 'username': self.username, 27 'password': self.password, 28 } 29 except AttributeError: 30 return False, f"Please login with the command `login {self}`." 31 response = self.post( 32 STATIC_CONFIG['api']['endpoints']['login'], 33 data = login_data, 34 use_token = False, 35 debug = debug 36 ) 37 if response: 38 msg = f"Successfully logged into '{self}' as user '{login_data['username']}'." 39 self._token = json.loads(response.text)['access_token'] 40 self._expires = datetime.datetime.strptime( 41 json.loads(response.text)['expires'], 42 '%Y-%m-%dT%H:%M:%S.%f' 43 ) 44 else: 45 msg = ( 46 f"Failed to log into '{self}' as user '{login_data['username']}'.\n" + 47 f" Please verify login details for connector '{self}'." 48 ) 49 if warn: 50 _warn(msg, stack=False) 51 52 return response.__bool__(), msg
Log in and set the session token.
55def test_connection( 56 self, 57 **kw: Any 58 ) -> Union[bool, None]: 59 """Test if a successful connection to the API may be made.""" 60 from meerschaum.connectors.poll import retry_connect 61 _default_kw = { 62 'max_retries': 1, 'retry_wait': 0, 'warn': False, 63 'connector': self, 'enforce_chaining': False, 64 'enforce_login': False, 65 } 66 _default_kw.update(kw) 67 try: 68 return retry_connect(**_default_kw) 69 except Exception as e: 70 return False
Test if a successful connection to the API may be made.
65def register_user( 66 self, 67 user: 'meerschaum.core.User', 68 debug: bool = False, 69 **kw: Any 70 ) -> SuccessTuple: 71 """Register a new user.""" 72 import json 73 from meerschaum.config.static import STATIC_CONFIG 74 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/register" 75 data = { 76 'username': user.username, 77 'password': user.password, 78 'attributes': json.dumps(user.attributes), 79 } 80 if user.type: 81 data['type'] = user.type 82 if user.email: 83 data['email'] = user.email 84 response = self.post(r_url, data=data, debug=debug) 85 try: 86 _json = json.loads(response.text) 87 if isinstance(_json, dict) and 'detail' in _json: 88 return False, _json['detail'] 89 success_tuple = tuple(_json) 90 except Exception: 91 msg = response.text if response else f"Failed to register user '{user}'." 92 return False, msg 93 94 return tuple(success_tuple)
Register a new user.
97def get_user_id( 98 self, 99 user: 'meerschaum.core.User', 100 debug: bool = False, 101 **kw: Any 102 ) -> Optional[int]: 103 """Get a user's ID.""" 104 from meerschaum.config.static import STATIC_CONFIG 105 import json 106 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/id" 107 response = self.get(r_url, debug=debug, **kw) 108 try: 109 user_id = int(json.loads(response.text)) 110 except Exception as e: 111 user_id = None 112 return user_id
Get a user's ID.
13def get_users( 14 self, 15 debug: bool = False, 16 **kw : Any 17 ) -> List[str]: 18 """ 19 Return a list of registered usernames. 20 """ 21 from meerschaum.config.static import STATIC_CONFIG 22 import json 23 response = self.get( 24 f"{STATIC_CONFIG['api']['endpoints']['users']}", 25 debug = debug, 26 use_token = True, 27 ) 28 if not response: 29 return [] 30 try: 31 return response.json() 32 except Exception as e: 33 return []
Return a list of registered usernames.
35def edit_user( 36 self, 37 user: 'meerschaum.core.User', 38 debug: bool = False, 39 **kw: Any 40 ) -> SuccessTuple: 41 """Edit an existing user.""" 42 import json 43 from meerschaum.config.static import STATIC_CONFIG 44 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/edit" 45 data = { 46 'username': user.username, 47 'password': user.password, 48 'type': user.type, 49 'email': user.email, 50 'attributes': json.dumps(user.attributes), 51 } 52 response = self.post(r_url, data=data, debug=debug) 53 try: 54 _json = json.loads(response.text) 55 if isinstance(_json, dict) and 'detail' in _json: 56 return False, _json['detail'] 57 success_tuple = tuple(_json) 58 except Exception as e: 59 msg = response.text if response else f"Failed to edit user '{user}'." 60 return False, msg 61 62 return tuple(success_tuple)
Edit an existing user.
114def delete_user( 115 self, 116 user: 'meerschaum.core.User', 117 debug: bool = False, 118 **kw: Any 119 ) -> SuccessTuple: 120 """Delete a user.""" 121 from meerschaum.config.static import STATIC_CONFIG 122 import json 123 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}" 124 response = self.delete(r_url, debug=debug) 125 try: 126 _json = json.loads(response.text) 127 if isinstance(_json, dict) and 'detail' in _json: 128 return False, _json['detail'] 129 success_tuple = tuple(_json) 130 except Exception as e: 131 success_tuple = False, f"Failed to delete user '{user.username}'." 132 return success_tuple
Delete a user.
155def get_user_password_hash( 156 self, 157 user: 'meerschaum.core.User', 158 debug: bool = False, 159 **kw: Any 160 ) -> Optional[str]: 161 """If configured, get a user's password hash.""" 162 from meerschaum.config.static import STATIC_CONFIG 163 r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/password_hash' 164 response = self.get(r_url, debug=debug, **kw) 165 if not response: 166 return None 167 return response.json()
If configured, get a user's password hash.
169def get_user_type( 170 self, 171 user: 'meerschaum.core.User', 172 debug: bool = False, 173 **kw: Any 174 ) -> Optional[str]: 175 """If configured, get a user's type.""" 176 from meerschaum.config.static import STATIC_CONFIG 177 r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/type' 178 response = self.get(r_url, debug=debug, **kw) 179 if not response: 180 return None 181 return response.json()
If configured, get a user's type.
134def get_user_attributes( 135 self, 136 user: 'meerschaum.core.User', 137 debug: bool = False, 138 **kw 139 ) -> int: 140 """Get a user's attributes.""" 141 from meerschaum.config.static import STATIC_CONFIG 142 import json 143 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/attributes" 144 response = self.get(r_url, debug=debug, **kw) 145 try: 146 attributes = json.loads(response.text) 147 except Exception as e: 148 attributes = None 149 return attributes
Get a user's attributes.
13@classmethod 14def from_uri( 15 cls, 16 uri: str, 17 label: Optional[str] = None, 18 as_dict: bool = False, 19) -> Union[ 20 'meerschaum.connectors.APIConnector', 21 Dict[str, Union[str, int]], 22 ]: 23 """ 24 Create a new APIConnector from a URI string. 25 26 Parameters 27 ---------- 28 uri: str 29 The URI connection string. 30 31 label: Optional[str], default None 32 If provided, use this as the connector label. 33 Otherwise use the determined database name. 34 35 as_dict: bool, default False 36 If `True`, return a dictionary of the keyword arguments 37 necessary to create a new `APIConnector`, otherwise create a new object. 38 39 Returns 40 ------- 41 A new APIConnector object or a dictionary of attributes (if `as_dict` is `True`). 42 """ 43 from meerschaum.connectors.sql import SQLConnector 44 params = SQLConnector.parse_uri(uri) 45 if 'host' not in params: 46 error("No host was found in the provided URI.") 47 params['protocol'] = params.pop('flavor') 48 params['label'] = label or ( 49 ( 50 (params['username'] + '@' if 'username' in params else '') 51 + params['host'] 52 ).lower() 53 ) 54 55 return cls(**params) if not as_dict else params
Create a new APIConnector from a URI string.
Parameters
- uri (str): The URI connection string.
- label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
- as_dict (bool, default False):
If
True
, return a dictionary of the keyword arguments necessary to create a newAPIConnector
, otherwise create a new object.
Returns
- A new APIConnector object or a dictionary of attributes (if
as_dict
isTrue
).
28def get_jobs(self, debug: bool = False) -> Dict[str, Job]: 29 """ 30 Return a dictionary of remote jobs. 31 """ 32 response = self.get(JOBS_ENDPOINT, debug=debug) 33 if not response: 34 warn(f"Failed to get remote jobs from {self}.") 35 return {} 36 return { 37 name: Job( 38 name, 39 job_meta['sysargs'], 40 executor_keys=str(self), 41 _properties=job_meta['daemon']['properties'] 42 ) 43 for name, job_meta in response.json().items() 44 }
Return a dictionary of remote jobs.
47def get_job(self, name: str, debug: bool = False) -> Job: 48 """ 49 Return a single Job object. 50 """ 51 metadata = self.get_job_metadata(name, debug=debug) 52 if not metadata: 53 raise ValueError(f"Job '{name}' does not exist.") 54 55 return Job( 56 name, 57 metadata['sysargs'], 58 executor_keys=str(self), 59 _properties=metadata['daemon']['properties'], 60 )
Return a single Job object.
63def get_job_metadata(self, name: str, debug: bool = False) -> Dict[str, Any]: 64 """ 65 Return the metadata for a single job. 66 """ 67 now = time.perf_counter() 68 _job_metadata_cache = self.__dict__.get('_job_metadata_cache', None) 69 _job_metadata_timestamp = ( 70 _job_metadata_cache.get(name, {}).get('timestamp', None) 71 ) if _job_metadata_cache is not None else None 72 73 if ( 74 _job_metadata_timestamp is not None 75 and (now - _job_metadata_timestamp) < JOB_METADATA_CACHE_SECONDS 76 ): 77 if debug: 78 dprint(f"Returning cached metadata for job '{name}'.") 79 return _job_metadata_cache[name]['metadata'] 80 81 response = self.get(JOBS_ENDPOINT + f"/{name}", debug=debug) 82 if not response: 83 if debug: 84 msg = ( 85 response.json()['detail'] 86 if 'detail' in response.text 87 else response.text 88 ) 89 warn(f"Failed to get metadata for job '{name}':\n{msg}") 90 return {} 91 92 metadata = response.json() 93 if _job_metadata_cache is None: 94 self._job_metadata_cache = {} 95 96 self._job_metadata_cache[name] = { 97 'timestamp': now, 98 'metadata': metadata, 99 } 100 return metadata
Return the metadata for a single job.
102def get_job_properties(self, name: str, debug: bool = False) -> Dict[str, Any]: 103 """ 104 Return the daemon properties for a single job. 105 """ 106 metadata = self.get_job_metadata(name, debug=debug) 107 return metadata.get('daemon', {}).get('properties', {})
Return the daemon properties for a single job.
149def get_job_exists(self, name: str, debug: bool = False) -> bool: 150 """ 151 Return whether a job exists. 152 """ 153 response = self.get(JOBS_ENDPOINT + f'/{name}/exists', debug=debug) 154 if not response: 155 warn(f"Failed to determine whether job '{name}' exists.") 156 return False 157 158 return response.json()
Return whether a job exists.
161def delete_job(self, name: str, debug: bool = False) -> SuccessTuple: 162 """ 163 Delete a job. 164 """ 165 response = self.delete(JOBS_ENDPOINT + f"/{name}", debug=debug) 166 if not response: 167 if 'detail' in response.text: 168 return False, response.json()['detail'] 169 170 return False, response.text 171 172 return tuple(response.json())
Delete a job.
175def start_job(self, name: str, debug: bool = False) -> SuccessTuple: 176 """ 177 Start a job. 178 """ 179 response = self.post(JOBS_ENDPOINT + f"/{name}/start", debug=debug) 180 if not response: 181 if 'detail' in response.text: 182 return False, response.json()['detail'] 183 return False, response.text 184 185 return tuple(response.json())
Start a job.
188def create_job( 189 self, 190 name: str, 191 sysargs: List[str], 192 properties: Optional[Dict[str, str]] = None, 193 debug: bool = False, 194) -> SuccessTuple: 195 """ 196 Create a job. 197 """ 198 response = self.post( 199 JOBS_ENDPOINT + f"/{name}", 200 json={ 201 'sysargs': sysargs, 202 'properties': properties, 203 }, 204 debug=debug, 205 ) 206 if not response: 207 if 'detail' in response.text: 208 return False, response.json()['detail'] 209 return False, response.text 210 211 return tuple(response.json())
Create a job.
214def stop_job(self, name: str, debug: bool = False) -> SuccessTuple: 215 """ 216 Stop a job. 217 """ 218 response = self.post(JOBS_ENDPOINT + f"/{name}/stop", debug=debug) 219 if not response: 220 if 'detail' in response.text: 221 return False, response.json()['detail'] 222 return False, response.text 223 224 return tuple(response.json())
Stop a job.
227def pause_job(self, name: str, debug: bool = False) -> SuccessTuple: 228 """ 229 Pause a job. 230 """ 231 response = self.post(JOBS_ENDPOINT + f"/{name}/pause", debug=debug) 232 if not response: 233 if 'detail' in response.text: 234 return False, response.json()['detail'] 235 return False, response.text 236 237 return tuple(response.json())
Pause a job.
240def get_logs(self, name: str, debug: bool = False) -> str: 241 """ 242 Return the logs for a job. 243 """ 244 response = self.get(LOGS_ENDPOINT + f"/{name}") 245 if not response: 246 raise ValueError(f"Cannot fetch logs for job '{name}':\n{response.text}") 247 248 return response.json()
Return the logs for a job.
251def get_job_stop_time(self, name: str, debug: bool = False) -> Union[datetime, None]: 252 """ 253 Return the job's manual stop time. 254 """ 255 response = self.get(JOBS_ENDPOINT + f"/{name}/stop_time") 256 if not response: 257 warn(f"Failed to get stop time for job '{name}':\n{response.text}") 258 return None 259 260 data = response.json() 261 if data is None: 262 return None 263 264 return datetime.fromisoformat(data)
Return the job's manual stop time.
348def monitor_logs( 349 self, 350 name: str, 351 callback_function: Callable[[Any], Any], 352 input_callback_function: Callable[[None], str], 353 stop_callback_function: Callable[[None], str], 354 stop_on_exit: bool = False, 355 strip_timestamps: bool = False, 356 accept_input: bool = True, 357 debug: bool = False, 358): 359 """ 360 Monitor a job's log files and execute a callback with the changes. 361 """ 362 return asyncio.run( 363 self.monitor_logs_async( 364 name, 365 callback_function, 366 input_callback_function=input_callback_function, 367 stop_callback_function=stop_callback_function, 368 stop_on_exit=stop_on_exit, 369 strip_timestamps=strip_timestamps, 370 accept_input=accept_input, 371 debug=debug 372 ) 373 )
Monitor a job's log files and execute a callback with the changes.
267async def monitor_logs_async( 268 self, 269 name: str, 270 callback_function: Callable[[Any], Any], 271 input_callback_function: Callable[[], str], 272 stop_callback_function: Callable[[SuccessTuple], str], 273 stop_on_exit: bool = False, 274 strip_timestamps: bool = False, 275 accept_input: bool = True, 276 debug: bool = False, 277): 278 """ 279 Monitor a job's log files and await a callback with the changes. 280 """ 281 import traceback 282 from meerschaum.jobs import StopMonitoringLogs 283 from meerschaum.utils.formatting._jobs import strip_timestamp_from_line 284 285 websockets, websockets_exceptions = mrsm.attempt_import('websockets', 'websockets.exceptions') 286 protocol = 'ws' if self.URI.startswith('http://') else 'wss' 287 port = self.port if 'port' in self.__dict__ else '' 288 uri = f"{protocol}://{self.host}:{port}{LOGS_ENDPOINT}/{name}/ws" 289 290 async def _stdin_callback(client): 291 if input_callback_function is None: 292 return 293 294 if asyncio.iscoroutinefunction(input_callback_function): 295 data = await input_callback_function() 296 else: 297 data = input_callback_function() 298 299 await client.send(data) 300 301 async def _stop_callback(client): 302 try: 303 result = tuple(json.loads(await client.recv())) 304 except Exception as e: 305 warn(traceback.format_exc()) 306 result = False, str(e) 307 308 if stop_callback_function is not None: 309 if asyncio.iscoroutinefunction(stop_callback_function): 310 await stop_callback_function(result) 311 else: 312 stop_callback_function(result) 313 314 if stop_on_exit: 315 raise StopMonitoringLogs 316 317 message_callbacks = { 318 JOBS_STDIN_MESSAGE: _stdin_callback, 319 JOBS_STOP_MESSAGE: _stop_callback, 320 } 321 322 async with websockets.connect(uri) as websocket: 323 try: 324 await websocket.send(self.token or 'no-login') 325 except websockets_exceptions.ConnectionClosedOK: 326 pass 327 328 while True: 329 try: 330 response = await websocket.recv() 331 callback = message_callbacks.get(response, None) 332 if callback is not None: 333 await callback(websocket) 334 continue 335 336 if strip_timestamps: 337 response = strip_timestamp_from_line(response) 338 339 if asyncio.iscoroutinefunction(callback_function): 340 await callback_function(response) 341 else: 342 callback_function(response) 343 except (KeyboardInterrupt, StopMonitoringLogs): 344 await websocket.close() 345 break
Monitor a job's log files and await a callback with the changes.
375def get_job_is_blocking_on_stdin(self, name: str, debug: bool = False) -> bool: 376 """ 377 Return whether a remote job is blocking on stdin. 378 """ 379 response = self.get(JOBS_ENDPOINT + f'/{name}/is_blocking_on_stdin', debug=debug) 380 if not response: 381 return False 382 383 return response.json()
Return whether a remote job is blocking on stdin.
116def get_job_began(self, name: str, debug: bool = False) -> Union[str, None]: 117 """ 118 Return a job's `began` timestamp, if it exists. 119 """ 120 properties = self.get_job_properties(name, debug=debug) 121 began_str = properties.get('daemon', {}).get('began', None) 122 if began_str is None: 123 return None 124 125 return began_str
Return a job's began
timestamp, if it exists.
127def get_job_ended(self, name: str, debug: bool = False) -> Union[str, None]: 128 """ 129 Return a job's `ended` timestamp, if it exists. 130 """ 131 properties = self.get_job_properties(name, debug=debug) 132 ended_str = properties.get('daemon', {}).get('ended', None) 133 if ended_str is None: 134 return None 135 136 return ended_str
Return a job's ended
timestamp, if it exists.
138def get_job_paused(self, name: str, debug: bool = False) -> Union[str, None]: 139 """ 140 Return a job's `paused` timestamp, if it exists. 141 """ 142 properties = self.get_job_properties(name, debug=debug) 143 paused_str = properties.get('daemon', {}).get('paused', None) 144 if paused_str is None: 145 return None 146 147 return paused_str
Return a job's paused
timestamp, if it exists.
109def get_job_status(self, name: str, debug: bool = False) -> str: 110 """ 111 Return the job's status. 112 """ 113 metadata = self.get_job_metadata(name, debug=debug) 114 return metadata.get('status', 'stopped')
Return the job's status.
80def get_connector( 81 type: str = None, 82 label: str = None, 83 refresh: bool = False, 84 debug: bool = False, 85 **kw: Any 86) -> Connector: 87 """ 88 Return existing connector or create new connection and store for reuse. 89 90 You can create new connectors if enough parameters are provided for the given type and flavor. 91 92 93 Parameters 94 ---------- 95 type: Optional[str], default None 96 Connector type (sql, api, etc.). 97 Defaults to the type of the configured `instance_connector`. 98 99 label: Optional[str], default None 100 Connector label (e.g. main). Defaults to `'main'`. 101 102 refresh: bool, default False 103 Refresh the Connector instance / construct new object. Defaults to `False`. 104 105 kw: Any 106 Other arguments to pass to the Connector constructor. 107 If the Connector has already been constructed and new arguments are provided, 108 `refresh` is set to `True` and the old Connector is replaced. 109 110 Returns 111 ------- 112 A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`, 113 `meerschaum.connectors.sql.SQLConnector`). 114 115 Examples 116 -------- 117 The following parameters would create a new 118 `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file. 119 120 ``` 121 >>> conn = get_connector( 122 ... type = 'sql', 123 ... label = 'newlabel', 124 ... flavor = 'sqlite', 125 ... database = '/file/path/to/database.db' 126 ... ) 127 >>> 128 ``` 129 130 """ 131 from meerschaum.connectors.parse import parse_instance_keys 132 from meerschaum.config import get_config 133 from meerschaum.config.static import STATIC_CONFIG 134 from meerschaum.utils.warnings import warn 135 global _loaded_plugin_connectors 136 if isinstance(type, str) and not label and ':' in type: 137 type, label = type.split(':', maxsplit=1) 138 139 with _locks['_loaded_plugin_connectors']: 140 if not _loaded_plugin_connectors: 141 load_plugin_connectors() 142 _load_builtin_custom_connectors() 143 _loaded_plugin_connectors = True 144 145 if type is None and label is None: 146 default_instance_keys = get_config('meerschaum', 'instance', patch=True) 147 ### recursive call to get_connector 148 return parse_instance_keys(default_instance_keys) 149 150 ### NOTE: the default instance connector may not be main. 151 ### Only fall back to 'main' if the type is provided by the label is omitted. 152 label = label if label is not None else STATIC_CONFIG['connectors']['default_label'] 153 154 ### type might actually be a label. Check if so and raise a warning. 155 if type not in connectors: 156 possibilities, poss_msg = [], "" 157 for _type in get_config('meerschaum', 'connectors'): 158 if type in get_config('meerschaum', 'connectors', _type): 159 possibilities.append(f"{_type}:{type}") 160 if len(possibilities) > 0: 161 poss_msg = " Did you mean" 162 for poss in possibilities[:-1]: 163 poss_msg += f" '{poss}'," 164 if poss_msg.endswith(','): 165 poss_msg = poss_msg[:-1] 166 if len(possibilities) > 1: 167 poss_msg += " or" 168 poss_msg += f" '{possibilities[-1]}'?" 169 170 warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False) 171 return None 172 173 if 'sql' not in types: 174 from meerschaum.connectors.plugin import PluginConnector 175 from meerschaum.connectors.valkey import ValkeyConnector 176 with _locks['types']: 177 types.update({ 178 'api': APIConnector, 179 'sql': SQLConnector, 180 'plugin': PluginConnector, 181 'valkey': ValkeyConnector, 182 }) 183 184 ### determine if we need to call the constructor 185 if not refresh: 186 ### see if any user-supplied arguments differ from the existing instance 187 if label in connectors[type]: 188 warning_message = None 189 for attribute, value in kw.items(): 190 if attribute not in connectors[type][label].meta: 191 import inspect 192 cls = connectors[type][label].__class__ 193 cls_init_signature = inspect.signature(cls) 194 cls_init_params = cls_init_signature.parameters 195 if attribute not in cls_init_params: 196 warning_message = ( 197 f"Received new attribute '{attribute}' not present in connector " + 198 f"{connectors[type][label]}.\n" 199 ) 200 elif connectors[type][label].__dict__[attribute] != value: 201 warning_message = ( 202 f"Mismatched values for attribute '{attribute}' in connector " 203 + f"'{connectors[type][label]}'.\n" + 204 f" - Keyword value: '{value}'\n" + 205 f" - Existing value: '{connectors[type][label].__dict__[attribute]}'\n" 206 ) 207 if warning_message is not None: 208 warning_message += ( 209 "\nSetting `refresh` to True and recreating connector with type:" 210 + f" '{type}' and label '{label}'." 211 ) 212 refresh = True 213 warn(warning_message) 214 else: ### connector doesn't yet exist 215 refresh = True 216 217 ### only create an object if refresh is True 218 ### (can be manually specified, otherwise determined above) 219 if refresh: 220 with _locks['connectors']: 221 try: 222 ### will raise an error if configuration is incorrect / missing 223 conn = types[type](label=label, **kw) 224 connectors[type][label] = conn 225 except InvalidAttributesError as ie: 226 warn( 227 f"Incorrect attributes for connector '{type}:{label}'.\n" 228 + str(ie), 229 stack = False, 230 ) 231 conn = None 232 except Exception as e: 233 from meerschaum.utils.formatting import get_console 234 console = get_console() 235 if console: 236 console.print_exception() 237 warn( 238 f"Exception when creating connector '{type}:{label}'.\n" + str(e), 239 stack = False, 240 ) 241 conn = None 242 if conn is None: 243 return None 244 245 return connectors[type][label]
Return existing connector or create new connection and store for reuse.
You can create new connectors if enough parameters are provided for the given type and flavor.
Parameters
- type (Optional[str], default None):
Connector type (sql, api, etc.).
Defaults to the type of the configured
instance_connector
. - label (Optional[str], default None):
Connector label (e.g. main). Defaults to
'main'
. - refresh (bool, default False):
Refresh the Connector instance / construct new object. Defaults to
False
. - kw (Any):
Other arguments to pass to the Connector constructor.
If the Connector has already been constructed and new arguments are provided,
refresh
is set toTrue
and the old Connector is replaced.
Returns
- A new Meerschaum connector (e.g.
meerschaum.connectors.api.APIConnector
, meerschaum.connectors.sql.SQLConnector
).
Examples
The following parameters would create a new
meerschaum.connectors.sql.SQLConnector
that isn't in the configuration file.
>>> conn = get_connector(
... type = 'sql',
... label = 'newlabel',
... flavor = 'sqlite',
... database = '/file/path/to/database.db'
... )
>>>
248def is_connected(keys: str, **kw) -> bool: 249 """ 250 Check if the connector keys correspond to an active connection. 251 If the connector has not been created, it will immediately return `False`. 252 If the connector exists but cannot communicate with the source, return `False`. 253 254 **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`). 255 Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`. 256 257 Parameters 258 ---------- 259 keys: 260 The keys to the connector (e.g. `'sql:main'`). 261 262 Returns 263 ------- 264 A `bool` corresponding to whether a successful connection may be made. 265 266 """ 267 import warnings 268 if ':' not in keys: 269 warn(f"Invalid connector keys '{keys}'") 270 271 try: 272 typ, label = keys.split(':') 273 except Exception: 274 return False 275 if typ not in instance_types: 276 return False 277 if label not in connectors.get(typ, {}): 278 return False 279 280 from meerschaum.connectors.parse import parse_instance_keys 281 conn = parse_instance_keys(keys) 282 try: 283 with warnings.catch_warnings(): 284 warnings.filterwarnings('ignore') 285 return conn.test_connection(**kw) 286 except Exception: 287 return False
Check if the connector keys correspond to an active connection.
If the connector has not been created, it will immediately return False
.
If the connector exists but cannot communicate with the source, return False
.
NOTE: Only works with instance connectors (SQLConnectors
and APIConnectors
).
Keyword arguments are passed to meerschaum.connectors.poll.retry_connect
.
Parameters
- keys:: The keys to the connector (e.g.
'sql:main'
).
Returns
- A
bool
corresponding to whether a successful connection may be made.