meerschaum.connectors
Create connectors with meerschaum.connectors.get_connector()
.
For ease of use, you can also import from the root meerschaum
module:
>>> from meerschaum import get_connector
>>> conn = get_connector()
1#! /usr/bin/env python 2# -*- coding: utf-8 -*- 3# vim:fenc=utf-8 4 5""" 6Create connectors with `meerschaum.connectors.get_connector()`. 7For ease of use, you can also import from the root `meerschaum` module: 8``` 9>>> from meerschaum import get_connector 10>>> conn = get_connector() 11``` 12""" 13 14from __future__ import annotations 15 16import meerschaum as mrsm 17from meerschaum.utils.typing import Any, Union, List, Dict 18from meerschaum.utils.threading import RLock 19from meerschaum.utils.warnings import warn 20 21from meerschaum.connectors._Connector import Connector, InvalidAttributesError 22from meerschaum.connectors.sql._SQLConnector import SQLConnector 23from meerschaum.connectors.api._APIConnector import APIConnector 24from meerschaum.connectors.sql._create_engine import flavor_configs as sql_flavor_configs 25 26__all__ = ( 27 "make_connector", 28 "Connector", 29 "SQLConnector", 30 "APIConnector", 31 "get_connector", 32 "is_connected", 33 "poll", 34 "api", 35 "sql", 36 "valkey", 37) 38 39### store connectors partitioned by 40### type, label for reuse 41connectors: Dict[str, Dict[str, Connector]] = { 42 'api' : {}, 43 'sql' : {}, 44 'plugin' : {}, 45 'valkey' : {}, 46} 47instance_types: List[str] = ['sql', 'api'] 48_locks: Dict[str, RLock] = { 49 'connectors' : RLock(), 50 'types' : RLock(), 51 'custom_types' : RLock(), 52 '_loaded_plugin_connectors': RLock(), 53 'instance_types' : RLock(), 54} 55attributes: Dict[str, Dict[str, Any]] = { 56 'api': { 57 'required': [ 58 'host', 59 'username', 60 'password', 61 ], 62 'optional': [ 63 'port', 64 ], 65 'default': { 66 'protocol': 'http', 67 }, 68 }, 69 'sql': { 70 'flavors': sql_flavor_configs, 71 }, 72} 73### Fill this with objects only when connectors are first referenced. 74types: Dict[str, Any] = {} 75custom_types: set = set() 76_loaded_plugin_connectors: bool = False 77 78 79def get_connector( 80 type: str = None, 81 label: str = None, 82 refresh: bool = False, 83 debug: bool = False, 84 **kw: Any 85) -> Connector: 86 """ 87 Return existing connector or create new connection and store for reuse. 88 89 You can create new connectors if enough parameters are provided for the given type and flavor. 90 91 92 Parameters 93 ---------- 94 type: Optional[str], default None 95 Connector type (sql, api, etc.). 96 Defaults to the type of the configured `instance_connector`. 97 98 label: Optional[str], default None 99 Connector label (e.g. main). Defaults to `'main'`. 100 101 refresh: bool, default False 102 Refresh the Connector instance / construct new object. Defaults to `False`. 103 104 kw: Any 105 Other arguments to pass to the Connector constructor. 106 If the Connector has already been constructed and new arguments are provided, 107 `refresh` is set to `True` and the old Connector is replaced. 108 109 Returns 110 ------- 111 A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`, 112 `meerschaum.connectors.sql.SQLConnector`). 113 114 Examples 115 -------- 116 The following parameters would create a new 117 `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file. 118 119 ``` 120 >>> conn = get_connector( 121 ... type = 'sql', 122 ... label = 'newlabel', 123 ... flavor = 'sqlite', 124 ... database = '/file/path/to/database.db' 125 ... ) 126 >>> 127 ``` 128 129 """ 130 from meerschaum.connectors.parse import parse_instance_keys 131 from meerschaum.config import get_config 132 from meerschaum.config.static import STATIC_CONFIG 133 from meerschaum.utils.warnings import warn 134 global _loaded_plugin_connectors 135 if isinstance(type, str) and not label and ':' in type: 136 type, label = type.split(':', maxsplit=1) 137 138 with _locks['_loaded_plugin_connectors']: 139 if not _loaded_plugin_connectors: 140 load_plugin_connectors() 141 _load_builtin_custom_connectors() 142 _loaded_plugin_connectors = True 143 144 if type is None and label is None: 145 default_instance_keys = get_config('meerschaum', 'instance', patch=True) 146 ### recursive call to get_connector 147 return parse_instance_keys(default_instance_keys) 148 149 ### NOTE: the default instance connector may not be main. 150 ### Only fall back to 'main' if the type is provided by the label is omitted. 151 label = label if label is not None else STATIC_CONFIG['connectors']['default_label'] 152 153 ### type might actually be a label. Check if so and raise a warning. 154 if type not in connectors: 155 possibilities, poss_msg = [], "" 156 for _type in get_config('meerschaum', 'connectors'): 157 if type in get_config('meerschaum', 'connectors', _type): 158 possibilities.append(f"{_type}:{type}") 159 if len(possibilities) > 0: 160 poss_msg = " Did you mean" 161 for poss in possibilities[:-1]: 162 poss_msg += f" '{poss}'," 163 if poss_msg.endswith(','): 164 poss_msg = poss_msg[:-1] 165 if len(possibilities) > 1: 166 poss_msg += " or" 167 poss_msg += f" '{possibilities[-1]}'?" 168 169 warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False) 170 return None 171 172 if 'sql' not in types: 173 from meerschaum.connectors.plugin import PluginConnector 174 from meerschaum.connectors.valkey import ValkeyConnector 175 with _locks['types']: 176 types.update({ 177 'api': APIConnector, 178 'sql': SQLConnector, 179 'plugin': PluginConnector, 180 'valkey': ValkeyConnector, 181 }) 182 183 ### determine if we need to call the constructor 184 if not refresh: 185 ### see if any user-supplied arguments differ from the existing instance 186 if label in connectors[type]: 187 warning_message = None 188 for attribute, value in kw.items(): 189 if attribute not in connectors[type][label].meta: 190 import inspect 191 cls = connectors[type][label].__class__ 192 cls_init_signature = inspect.signature(cls) 193 cls_init_params = cls_init_signature.parameters 194 if attribute not in cls_init_params: 195 warning_message = ( 196 f"Received new attribute '{attribute}' not present in connector " + 197 f"{connectors[type][label]}.\n" 198 ) 199 elif connectors[type][label].__dict__[attribute] != value: 200 warning_message = ( 201 f"Mismatched values for attribute '{attribute}' in connector " 202 + f"'{connectors[type][label]}'.\n" + 203 f" - Keyword value: '{value}'\n" + 204 f" - Existing value: '{connectors[type][label].__dict__[attribute]}'\n" 205 ) 206 if warning_message is not None: 207 warning_message += ( 208 "\nSetting `refresh` to True and recreating connector with type:" 209 + f" '{type}' and label '{label}'." 210 ) 211 refresh = True 212 warn(warning_message) 213 else: ### connector doesn't yet exist 214 refresh = True 215 216 ### only create an object if refresh is True 217 ### (can be manually specified, otherwise determined above) 218 if refresh: 219 with _locks['connectors']: 220 try: 221 ### will raise an error if configuration is incorrect / missing 222 conn = types[type](label=label, **kw) 223 connectors[type][label] = conn 224 except InvalidAttributesError as ie: 225 warn( 226 f"Incorrect attributes for connector '{type}:{label}'.\n" 227 + str(ie), 228 stack = False, 229 ) 230 conn = None 231 except Exception as e: 232 from meerschaum.utils.formatting import get_console 233 console = get_console() 234 if console: 235 console.print_exception() 236 warn( 237 f"Exception when creating connector '{type}:{label}'.\n" + str(e), 238 stack = False, 239 ) 240 conn = None 241 if conn is None: 242 return None 243 244 return connectors[type][label] 245 246 247def is_connected(keys: str, **kw) -> bool: 248 """ 249 Check if the connector keys correspond to an active connection. 250 If the connector has not been created, it will immediately return `False`. 251 If the connector exists but cannot communicate with the source, return `False`. 252 253 **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`). 254 Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`. 255 256 Parameters 257 ---------- 258 keys: 259 The keys to the connector (e.g. `'sql:main'`). 260 261 Returns 262 ------- 263 A `bool` corresponding to whether a successful connection may be made. 264 265 """ 266 import warnings 267 if ':' not in keys: 268 warn(f"Invalid connector keys '{keys}'") 269 270 try: 271 typ, label = keys.split(':') 272 except Exception: 273 return False 274 if typ not in instance_types: 275 return False 276 if label not in connectors.get(typ, {}): 277 return False 278 279 from meerschaum.connectors.parse import parse_instance_keys 280 conn = parse_instance_keys(keys) 281 try: 282 with warnings.catch_warnings(): 283 warnings.filterwarnings('ignore') 284 return conn.test_connection(**kw) 285 except Exception: 286 return False 287 288 289def make_connector(cls, _is_executor: bool = False): 290 """ 291 Register a class as a `Connector`. 292 The `type` will be the lower case of the class name, without the suffix `connector`. 293 294 Parameters 295 ---------- 296 instance: bool, default False 297 If `True`, make this connector type an instance connector. 298 This requires implementing the various pipes functions and lots of testing. 299 300 Examples 301 -------- 302 >>> import meerschaum as mrsm 303 >>> from meerschaum.connectors import make_connector, Connector 304 >>> 305 >>> @make_connector 306 >>> class FooConnector(Connector): 307 ... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password'] 308 ... 309 >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat') 310 >>> print(conn.username, conn.password) 311 dog cat 312 >>> 313 """ 314 import re 315 suffix_regex = ( 316 r'connector$' 317 if not _is_executor 318 else r'executor$' 319 ) 320 typ = re.sub(suffix_regex, '', cls.__name__.lower()) 321 with _locks['types']: 322 types[typ] = cls 323 with _locks['custom_types']: 324 custom_types.add(typ) 325 with _locks['connectors']: 326 if typ not in connectors: 327 connectors[typ] = {} 328 if getattr(cls, 'IS_INSTANCE', False): 329 with _locks['instance_types']: 330 if typ not in instance_types: 331 instance_types.append(typ) 332 333 return cls 334 335 336def load_plugin_connectors(): 337 """ 338 If a plugin makes use of the `make_connector` decorator, 339 load its module. 340 """ 341 from meerschaum.plugins import get_plugins, import_plugins 342 to_import = [] 343 for plugin in get_plugins(): 344 if plugin is None: 345 continue 346 with open(plugin.__file__, encoding='utf-8') as f: 347 text = f.read() 348 if 'make_connector' in text or 'Connector' in text: 349 to_import.append(plugin.name) 350 if not to_import: 351 return 352 import_plugins(*to_import) 353 354 355def get_connector_plugin( 356 connector: Connector, 357) -> Union[str, None, mrsm.Plugin]: 358 """ 359 Determine the plugin for a connector. 360 This is useful for handling virtual environments for custom instance connectors. 361 362 Parameters 363 ---------- 364 connector: Connector 365 The connector which may require a virtual environment. 366 367 Returns 368 ------- 369 A Plugin, 'mrsm', or None. 370 """ 371 if not hasattr(connector, 'type'): 372 return None 373 plugin_name = ( 374 connector.__module__.replace('plugins.', '').split('.')[0] 375 if connector.type in custom_types else ( 376 connector.label 377 if connector.type == 'plugin' 378 else 'mrsm' 379 ) 380 ) 381 plugin = mrsm.Plugin(plugin_name) 382 return plugin if plugin.is_installed() else None 383 384 385def _load_builtin_custom_connectors(): 386 """ 387 Import custom connectors decorated with `@make_connector` or `@make_executor`. 388 """ 389 import meerschaum.jobs.systemd 390 import meerschaum.connectors.valkey
290def make_connector(cls, _is_executor: bool = False): 291 """ 292 Register a class as a `Connector`. 293 The `type` will be the lower case of the class name, without the suffix `connector`. 294 295 Parameters 296 ---------- 297 instance: bool, default False 298 If `True`, make this connector type an instance connector. 299 This requires implementing the various pipes functions and lots of testing. 300 301 Examples 302 -------- 303 >>> import meerschaum as mrsm 304 >>> from meerschaum.connectors import make_connector, Connector 305 >>> 306 >>> @make_connector 307 >>> class FooConnector(Connector): 308 ... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password'] 309 ... 310 >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat') 311 >>> print(conn.username, conn.password) 312 dog cat 313 >>> 314 """ 315 import re 316 suffix_regex = ( 317 r'connector$' 318 if not _is_executor 319 else r'executor$' 320 ) 321 typ = re.sub(suffix_regex, '', cls.__name__.lower()) 322 with _locks['types']: 323 types[typ] = cls 324 with _locks['custom_types']: 325 custom_types.add(typ) 326 with _locks['connectors']: 327 if typ not in connectors: 328 connectors[typ] = {} 329 if getattr(cls, 'IS_INSTANCE', False): 330 with _locks['instance_types']: 331 if typ not in instance_types: 332 instance_types.append(typ) 333 334 return cls
Register a class as a Connector
.
The type
will be the lower case of the class name, without the suffix connector
.
Parameters
- instance (bool, default False):
If
True
, make this connector type an instance connector. This requires implementing the various pipes functions and lots of testing.
Examples
>>> import meerschaum as mrsm
>>> from meerschaum.connectors import make_connector, Connector
>>>
>>> @make_connector
>>> class FooConnector(Connector):
... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
...
>>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
>>> print(conn.username, conn.password)
dog cat
>>>
20class Connector(metaclass=abc.ABCMeta): 21 """ 22 The base connector class to hold connection attributes. 23 """ 24 def __init__( 25 self, 26 type: Optional[str] = None, 27 label: Optional[str] = None, 28 **kw: Any 29 ): 30 """ 31 Set the given keyword arguments as attributes. 32 33 Parameters 34 ---------- 35 type: str 36 The `type` of the connector (e.g. `sql`, `api`, `plugin`). 37 38 label: str 39 The `label` for the connector. 40 41 42 Examples 43 -------- 44 Run `mrsm edit config` and to edit connectors in the YAML file: 45 46 ```yaml 47 meerschaum: 48 connections: 49 {type}: 50 {label}: 51 ### attributes go here 52 ``` 53 54 """ 55 self._original_dict = copy.deepcopy(self.__dict__) 56 self._set_attributes(type=type, label=label, **kw) 57 58 ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set. 59 self.verify_attributes( 60 ['uri'] 61 if 'uri' in self.__dict__ 62 else getattr(self, 'REQUIRED_ATTRIBUTES', None) 63 ) 64 65 def _reset_attributes(self): 66 self.__dict__ = self._original_dict 67 68 def _set_attributes( 69 self, 70 *args, 71 inherit_default: bool = True, 72 **kw: Any 73 ): 74 from meerschaum.config.static import STATIC_CONFIG 75 from meerschaum.utils.warnings import error 76 77 self._attributes = {} 78 79 default_label = STATIC_CONFIG['connectors']['default_label'] 80 81 ### NOTE: Support the legacy method of explicitly passing the type. 82 label = kw.get('label', None) 83 if label is None: 84 if len(args) == 2: 85 label = args[1] 86 elif len(args) == 0: 87 label = None 88 else: 89 label = args[0] 90 91 if label == 'default': 92 error( 93 f"Label cannot be 'default'. Did you mean '{default_label}'?", 94 InvalidAttributesError, 95 ) 96 self.__dict__['label'] = label 97 98 from meerschaum.config import get_config 99 conn_configs = copy.deepcopy(get_config('meerschaum', 'connectors')) 100 connector_config = copy.deepcopy(get_config('system', 'connectors')) 101 102 ### inherit attributes from 'default' if exists 103 if inherit_default: 104 inherit_from = 'default' 105 if self.type in conn_configs and inherit_from in conn_configs[self.type]: 106 _inherit_dict = copy.deepcopy(conn_configs[self.type][inherit_from]) 107 self._attributes.update(_inherit_dict) 108 109 ### load user config into self._attributes 110 if self.type in conn_configs and self.label in conn_configs[self.type]: 111 self._attributes.update(conn_configs[self.type][self.label] or {}) 112 113 ### load system config into self._sys_config 114 ### (deep copy so future Connectors don't inherit changes) 115 if self.type in connector_config: 116 self._sys_config = copy.deepcopy(connector_config[self.type]) 117 118 ### add additional arguments or override configuration 119 self._attributes.update(kw) 120 121 ### finally, update __dict__ with _attributes. 122 self.__dict__.update(self._attributes) 123 124 def verify_attributes( 125 self, 126 required_attributes: Optional[List[str]] = None, 127 debug: bool = False, 128 ) -> None: 129 """ 130 Ensure that the required attributes have been met. 131 132 The Connector base class checks the minimum requirements. 133 Child classes may enforce additional requirements. 134 135 Parameters 136 ---------- 137 required_attributes: Optional[List[str]], default None 138 Attributes to be verified. If `None`, default to `['label']`. 139 140 debug: bool, default False 141 Verbosity toggle. 142 143 Returns 144 ------- 145 Don't return anything. 146 147 Raises 148 ------ 149 An error if any of the required attributes are missing. 150 """ 151 from meerschaum.utils.warnings import error, warn 152 from meerschaum.utils.debug import dprint 153 from meerschaum.utils.misc import items_str 154 if required_attributes is None: 155 required_attributes = ['label'] 156 157 missing_attributes = set() 158 for a in required_attributes: 159 if a not in self.__dict__: 160 missing_attributes.add(a) 161 if len(missing_attributes) > 0: 162 error( 163 ( 164 f"Missing {items_str(list(missing_attributes))} " 165 + f"for connector '{self.type}:{self.label}'." 166 ), 167 InvalidAttributesError, 168 silent=True, 169 stack=False 170 ) 171 172 173 def __str__(self): 174 """ 175 When cast to a string, return type:label. 176 """ 177 return f"{self.type}:{self.label}" 178 179 def __repr__(self): 180 """ 181 Represent the connector as type:label. 182 """ 183 return str(self) 184 185 @property 186 def meta(self) -> Dict[str, Any]: 187 """ 188 Return the keys needed to reconstruct this Connector. 189 """ 190 _meta = { 191 key: value 192 for key, value in self.__dict__.items() 193 if not str(key).startswith('_') 194 } 195 _meta.update({ 196 'type': self.type, 197 'label': self.label, 198 }) 199 return _meta 200 201 202 @property 203 def type(self) -> str: 204 """ 205 Return the type for this connector. 206 """ 207 _type = self.__dict__.get('type', None) 208 if _type is None: 209 import re 210 is_executor = self.__class__.__name__.lower().endswith('executor') 211 suffix_regex = ( 212 r'connector$' 213 if not is_executor 214 else r'executor$' 215 ) 216 _type = re.sub(suffix_regex, '', self.__class__.__name__.lower()) 217 self.__dict__['type'] = _type 218 return _type 219 220 221 @property 222 def label(self) -> str: 223 """ 224 Return the label for this connector. 225 """ 226 _label = self.__dict__.get('label', None) 227 if _label is None: 228 from meerschaum.config.static import STATIC_CONFIG 229 _label = STATIC_CONFIG['connectors']['default_label'] 230 self.__dict__['label'] = _label 231 return _label
The base connector class to hold connection attributes.
24 def __init__( 25 self, 26 type: Optional[str] = None, 27 label: Optional[str] = None, 28 **kw: Any 29 ): 30 """ 31 Set the given keyword arguments as attributes. 32 33 Parameters 34 ---------- 35 type: str 36 The `type` of the connector (e.g. `sql`, `api`, `plugin`). 37 38 label: str 39 The `label` for the connector. 40 41 42 Examples 43 -------- 44 Run `mrsm edit config` and to edit connectors in the YAML file: 45 46 ```yaml 47 meerschaum: 48 connections: 49 {type}: 50 {label}: 51 ### attributes go here 52 ``` 53 54 """ 55 self._original_dict = copy.deepcopy(self.__dict__) 56 self._set_attributes(type=type, label=label, **kw) 57 58 ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set. 59 self.verify_attributes( 60 ['uri'] 61 if 'uri' in self.__dict__ 62 else getattr(self, 'REQUIRED_ATTRIBUTES', None) 63 )
124 def verify_attributes( 125 self, 126 required_attributes: Optional[List[str]] = None, 127 debug: bool = False, 128 ) -> None: 129 """ 130 Ensure that the required attributes have been met. 131 132 The Connector base class checks the minimum requirements. 133 Child classes may enforce additional requirements. 134 135 Parameters 136 ---------- 137 required_attributes: Optional[List[str]], default None 138 Attributes to be verified. If `None`, default to `['label']`. 139 140 debug: bool, default False 141 Verbosity toggle. 142 143 Returns 144 ------- 145 Don't return anything. 146 147 Raises 148 ------ 149 An error if any of the required attributes are missing. 150 """ 151 from meerschaum.utils.warnings import error, warn 152 from meerschaum.utils.debug import dprint 153 from meerschaum.utils.misc import items_str 154 if required_attributes is None: 155 required_attributes = ['label'] 156 157 missing_attributes = set() 158 for a in required_attributes: 159 if a not in self.__dict__: 160 missing_attributes.add(a) 161 if len(missing_attributes) > 0: 162 error( 163 ( 164 f"Missing {items_str(list(missing_attributes))} " 165 + f"for connector '{self.type}:{self.label}'." 166 ), 167 InvalidAttributesError, 168 silent=True, 169 stack=False 170 )
Ensure that the required attributes have been met.
The Connector base class checks the minimum requirements. Child classes may enforce additional requirements.
Parameters
- required_attributes (Optional[List[str]], default None):
Attributes to be verified. If
None
, default to['label']
. - debug (bool, default False): Verbosity toggle.
Returns
- Don't return anything.
Raises
- An error if any of the required attributes are missing.
185 @property 186 def meta(self) -> Dict[str, Any]: 187 """ 188 Return the keys needed to reconstruct this Connector. 189 """ 190 _meta = { 191 key: value 192 for key, value in self.__dict__.items() 193 if not str(key).startswith('_') 194 } 195 _meta.update({ 196 'type': self.type, 197 'label': self.label, 198 }) 199 return _meta
Return the keys needed to reconstruct this Connector.
202 @property 203 def type(self) -> str: 204 """ 205 Return the type for this connector. 206 """ 207 _type = self.__dict__.get('type', None) 208 if _type is None: 209 import re 210 is_executor = self.__class__.__name__.lower().endswith('executor') 211 suffix_regex = ( 212 r'connector$' 213 if not is_executor 214 else r'executor$' 215 ) 216 _type = re.sub(suffix_regex, '', self.__class__.__name__.lower()) 217 self.__dict__['type'] = _type 218 return _type
Return the type for this connector.
221 @property 222 def label(self) -> str: 223 """ 224 Return the label for this connector. 225 """ 226 _label = self.__dict__.get('label', None) 227 if _label is None: 228 from meerschaum.config.static import STATIC_CONFIG 229 _label = STATIC_CONFIG['connectors']['default_label'] 230 self.__dict__['label'] = _label 231 return _label
Return the label for this connector.
18class SQLConnector(Connector): 19 """ 20 Connect to SQL databases via `sqlalchemy`. 21 22 SQLConnectors may be used as Meerschaum instance connectors. 23 Read more about connectors and instances at 24 https://meerschaum.io/reference/connectors/ 25 26 """ 27 28 IS_INSTANCE: bool = True 29 30 from ._create_engine import flavor_configs, create_engine 31 from ._sql import ( 32 read, 33 value, 34 exec, 35 execute, 36 to_sql, 37 exec_queries, 38 get_connection, 39 _cleanup_connections, 40 ) 41 from meerschaum.utils.sql import test_connection 42 from ._fetch import fetch, get_pipe_metadef 43 from ._cli import cli, _cli_exit 44 from ._pipes import ( 45 fetch_pipes_keys, 46 create_indices, 47 drop_indices, 48 get_create_index_queries, 49 get_drop_index_queries, 50 get_add_columns_queries, 51 get_alter_columns_queries, 52 delete_pipe, 53 get_pipe_data, 54 get_pipe_data_query, 55 register_pipe, 56 edit_pipe, 57 get_pipe_id, 58 get_pipe_attributes, 59 sync_pipe, 60 sync_pipe_inplace, 61 get_sync_time, 62 pipe_exists, 63 get_pipe_rowcount, 64 drop_pipe, 65 clear_pipe, 66 deduplicate_pipe, 67 get_pipe_table, 68 get_pipe_columns_types, 69 get_to_sql_dtype, 70 get_pipe_schema, 71 create_pipe_table_from_df, 72 get_pipe_columns_indices, 73 ) 74 from ._plugins import ( 75 register_plugin, 76 delete_plugin, 77 get_plugin_id, 78 get_plugin_version, 79 get_plugins, 80 get_plugin_user_id, 81 get_plugin_username, 82 get_plugin_attributes, 83 ) 84 from ._users import ( 85 register_user, 86 get_user_id, 87 get_users, 88 edit_user, 89 delete_user, 90 get_user_password_hash, 91 get_user_type, 92 get_user_attributes, 93 ) 94 from ._uri import from_uri, parse_uri 95 from ._instance import ( 96 _log_temporary_tables_creation, 97 _drop_temporary_table, 98 _drop_temporary_tables, 99 _drop_old_temporary_tables, 100 ) 101 102 def __init__( 103 self, 104 label: Optional[str] = None, 105 flavor: Optional[str] = None, 106 wait: bool = False, 107 connect: bool = False, 108 debug: bool = False, 109 **kw: Any 110 ): 111 """ 112 Parameters 113 ---------- 114 label: str, default 'main' 115 The identifying label for the connector. 116 E.g. for `sql:main`, 'main' is the label. 117 Defaults to 'main'. 118 119 flavor: Optional[str], default None 120 The database flavor, e.g. 121 `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc. 122 To see supported flavors, run the `bootstrap connectors` command. 123 124 wait: bool, default False 125 If `True`, block until a database connection has been made. 126 Defaults to `False`. 127 128 connect: bool, default False 129 If `True`, immediately attempt to connect the database and raise 130 a warning if the connection fails. 131 Defaults to `False`. 132 133 debug: bool, default False 134 Verbosity toggle. 135 Defaults to `False`. 136 137 kw: Any 138 All other arguments will be passed to the connector's attributes. 139 Therefore, a connector may be made without being registered, 140 as long enough parameters are supplied to the constructor. 141 """ 142 if 'uri' in kw: 143 uri = kw['uri'] 144 if uri.startswith('postgres') and not uri.startswith('postgresql'): 145 uri = uri.replace('postgres', 'postgresql', 1) 146 if uri.startswith('postgresql') and not uri.startswith('postgresql+'): 147 uri = uri.replace('postgresql://', 'postgresql+psycopg://', 1) 148 if uri.startswith('timescaledb://'): 149 uri = uri.replace('timescaledb://', 'postgresql+psycopg://', 1) 150 flavor = 'timescaledb' 151 kw['uri'] = uri 152 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 153 label = label or from_uri_params.get('label', None) 154 _ = from_uri_params.pop('label', None) 155 156 ### Sometimes the flavor may be provided with a URI. 157 kw.update(from_uri_params) 158 if flavor: 159 kw['flavor'] = flavor 160 161 ### set __dict__ in base class 162 super().__init__( 163 'sql', 164 label = label or self.__dict__.get('label', None), 165 **kw 166 ) 167 168 if self.__dict__.get('flavor', None) == 'sqlite': 169 self._reset_attributes() 170 self._set_attributes( 171 'sql', 172 label = label, 173 inherit_default = False, 174 **kw 175 ) 176 ### For backwards compatability reasons, set the path for sql:local if its missing. 177 if self.label == 'local' and not self.__dict__.get('database', None): 178 from meerschaum.config._paths import SQLITE_DB_PATH 179 self.database = str(SQLITE_DB_PATH) 180 181 ### ensure flavor and label are set accordingly 182 if 'flavor' not in self.__dict__: 183 if flavor is None and 'uri' not in self.__dict__: 184 raise Exception( 185 f" Missing flavor. Provide flavor as a key for '{self}'." 186 ) 187 self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None) 188 189 if self.flavor == 'postgres': 190 self.flavor = 'postgresql' 191 192 self._debug = debug 193 ### Store the PID and thread at initialization 194 ### so we can dispose of the Pool in child processes or threads. 195 import os, threading 196 self._pid = os.getpid() 197 self._thread_ident = threading.current_thread().ident 198 self._sessions = {} 199 self._locks = {'_sessions': threading.RLock(), } 200 201 ### verify the flavor's requirements are met 202 if self.flavor not in self.flavor_configs: 203 error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector") 204 if not self.__dict__.get('uri'): 205 self.verify_attributes( 206 self.flavor_configs[self.flavor].get('requirements', set()), 207 debug=debug, 208 ) 209 210 if wait: 211 from meerschaum.connectors.poll import retry_connect 212 retry_connect(connector=self, debug=debug) 213 214 if connect: 215 if not self.test_connection(debug=debug): 216 warn(f"Failed to connect with connector '{self}'!", stack=False) 217 218 @property 219 def Session(self): 220 if '_Session' not in self.__dict__: 221 if self.engine is None: 222 return None 223 224 from meerschaum.utils.packages import attempt_import 225 sqlalchemy_orm = attempt_import('sqlalchemy.orm') 226 session_factory = sqlalchemy_orm.sessionmaker(self.engine) 227 self._Session = sqlalchemy_orm.scoped_session(session_factory) 228 229 return self._Session 230 231 @property 232 def engine(self): 233 """ 234 Return the SQLAlchemy engine connected to the configured database. 235 """ 236 import os 237 import threading 238 if '_engine' not in self.__dict__: 239 self._engine, self._engine_str = self.create_engine(include_uri=True) 240 241 same_process = os.getpid() == self._pid 242 same_thread = threading.current_thread().ident == self._thread_ident 243 244 ### handle child processes 245 if not same_process: 246 self._pid = os.getpid() 247 self._thread = threading.current_thread() 248 warn("Different PID detected. Disposing of connections...") 249 self._engine.dispose() 250 251 ### handle different threads 252 if not same_thread: 253 if self.flavor == 'duckdb': 254 warn("Different thread detected.") 255 self._engine.dispose() 256 257 return self._engine 258 259 @property 260 def DATABASE_URL(self) -> str: 261 """ 262 Return the URI connection string (alias for `SQLConnector.URI`. 263 """ 264 _ = self.engine 265 return str(self._engine_str) 266 267 @property 268 def URI(self) -> str: 269 """ 270 Return the URI connection string. 271 """ 272 _ = self.engine 273 return str(self._engine_str) 274 275 @property 276 def IS_THREAD_SAFE(self) -> str: 277 """ 278 Return whether this connector may be multithreaded. 279 """ 280 if self.flavor in ('duckdb', 'oracle'): 281 return False 282 if self.flavor == 'sqlite': 283 return ':memory:' not in self.URI 284 return True 285 286 287 @property 288 def metadata(self): 289 """ 290 Return the metadata bound to this configured schema. 291 """ 292 from meerschaum.utils.packages import attempt_import 293 sqlalchemy = attempt_import('sqlalchemy') 294 if '_metadata' not in self.__dict__: 295 self._metadata = sqlalchemy.MetaData(schema=self.schema) 296 return self._metadata 297 298 299 @property 300 def instance_schema(self): 301 """ 302 Return the schema name for Meerschaum tables. 303 """ 304 return self.schema 305 306 307 @property 308 def internal_schema(self): 309 """ 310 Return the schema name for internal tables. 311 """ 312 from meerschaum.config.static import STATIC_CONFIG 313 from meerschaum.utils.packages import attempt_import 314 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 315 schema_name = self.__dict__.get('internal_schema', None) or ( 316 STATIC_CONFIG['sql']['internal_schema'] 317 if self.flavor not in NO_SCHEMA_FLAVORS 318 else self.schema 319 ) 320 321 if '_internal_schema' not in self.__dict__: 322 self._internal_schema = schema_name 323 return self._internal_schema 324 325 326 @property 327 def db(self) -> Optional[databases.Database]: 328 from meerschaum.utils.packages import attempt_import 329 databases = attempt_import('databases', lazy=False, install=True) 330 url = self.DATABASE_URL 331 if 'mysql' in url: 332 url = url.replace('+pymysql', '') 333 if '_db' not in self.__dict__: 334 try: 335 self._db = databases.Database(url) 336 except KeyError: 337 ### Likely encountered an unsupported flavor. 338 from meerschaum.utils.warnings import warn 339 self._db = None 340 return self._db 341 342 343 @property 344 def db_version(self) -> Union[str, None]: 345 """ 346 Return the database version. 347 """ 348 _db_version = self.__dict__.get('_db_version', None) 349 if _db_version is not None: 350 return _db_version 351 352 from meerschaum.utils.sql import get_db_version 353 self._db_version = get_db_version(self) 354 return self._db_version 355 356 357 @property 358 def schema(self) -> Union[str, None]: 359 """ 360 Return the default schema to use. 361 A value of `None` will not prepend a schema. 362 """ 363 if 'schema' in self.__dict__: 364 return self.__dict__['schema'] 365 366 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 367 if self.flavor in NO_SCHEMA_FLAVORS: 368 self.__dict__['schema'] = None 369 return None 370 371 sqlalchemy = mrsm.attempt_import('sqlalchemy') 372 _schema = sqlalchemy.inspect(self.engine).default_schema_name 373 self.__dict__['schema'] = _schema 374 return _schema 375 376 377 def __getstate__(self): 378 return self.__dict__ 379 380 def __setstate__(self, d): 381 self.__dict__.update(d) 382 383 def __call__(self): 384 return self
Connect to SQL databases via sqlalchemy
.
SQLConnectors may be used as Meerschaum instance connectors. Read more about connectors and instances at https://meerschaum.io/reference/connectors/
102 def __init__( 103 self, 104 label: Optional[str] = None, 105 flavor: Optional[str] = None, 106 wait: bool = False, 107 connect: bool = False, 108 debug: bool = False, 109 **kw: Any 110 ): 111 """ 112 Parameters 113 ---------- 114 label: str, default 'main' 115 The identifying label for the connector. 116 E.g. for `sql:main`, 'main' is the label. 117 Defaults to 'main'. 118 119 flavor: Optional[str], default None 120 The database flavor, e.g. 121 `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc. 122 To see supported flavors, run the `bootstrap connectors` command. 123 124 wait: bool, default False 125 If `True`, block until a database connection has been made. 126 Defaults to `False`. 127 128 connect: bool, default False 129 If `True`, immediately attempt to connect the database and raise 130 a warning if the connection fails. 131 Defaults to `False`. 132 133 debug: bool, default False 134 Verbosity toggle. 135 Defaults to `False`. 136 137 kw: Any 138 All other arguments will be passed to the connector's attributes. 139 Therefore, a connector may be made without being registered, 140 as long enough parameters are supplied to the constructor. 141 """ 142 if 'uri' in kw: 143 uri = kw['uri'] 144 if uri.startswith('postgres') and not uri.startswith('postgresql'): 145 uri = uri.replace('postgres', 'postgresql', 1) 146 if uri.startswith('postgresql') and not uri.startswith('postgresql+'): 147 uri = uri.replace('postgresql://', 'postgresql+psycopg://', 1) 148 if uri.startswith('timescaledb://'): 149 uri = uri.replace('timescaledb://', 'postgresql+psycopg://', 1) 150 flavor = 'timescaledb' 151 kw['uri'] = uri 152 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 153 label = label or from_uri_params.get('label', None) 154 _ = from_uri_params.pop('label', None) 155 156 ### Sometimes the flavor may be provided with a URI. 157 kw.update(from_uri_params) 158 if flavor: 159 kw['flavor'] = flavor 160 161 ### set __dict__ in base class 162 super().__init__( 163 'sql', 164 label = label or self.__dict__.get('label', None), 165 **kw 166 ) 167 168 if self.__dict__.get('flavor', None) == 'sqlite': 169 self._reset_attributes() 170 self._set_attributes( 171 'sql', 172 label = label, 173 inherit_default = False, 174 **kw 175 ) 176 ### For backwards compatability reasons, set the path for sql:local if its missing. 177 if self.label == 'local' and not self.__dict__.get('database', None): 178 from meerschaum.config._paths import SQLITE_DB_PATH 179 self.database = str(SQLITE_DB_PATH) 180 181 ### ensure flavor and label are set accordingly 182 if 'flavor' not in self.__dict__: 183 if flavor is None and 'uri' not in self.__dict__: 184 raise Exception( 185 f" Missing flavor. Provide flavor as a key for '{self}'." 186 ) 187 self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None) 188 189 if self.flavor == 'postgres': 190 self.flavor = 'postgresql' 191 192 self._debug = debug 193 ### Store the PID and thread at initialization 194 ### so we can dispose of the Pool in child processes or threads. 195 import os, threading 196 self._pid = os.getpid() 197 self._thread_ident = threading.current_thread().ident 198 self._sessions = {} 199 self._locks = {'_sessions': threading.RLock(), } 200 201 ### verify the flavor's requirements are met 202 if self.flavor not in self.flavor_configs: 203 error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector") 204 if not self.__dict__.get('uri'): 205 self.verify_attributes( 206 self.flavor_configs[self.flavor].get('requirements', set()), 207 debug=debug, 208 ) 209 210 if wait: 211 from meerschaum.connectors.poll import retry_connect 212 retry_connect(connector=self, debug=debug) 213 214 if connect: 215 if not self.test_connection(debug=debug): 216 warn(f"Failed to connect with connector '{self}'!", stack=False)
Parameters
- label (str, default 'main'):
The identifying label for the connector.
E.g. for
sql:main
, 'main' is the label. Defaults to 'main'. - flavor (Optional[str], default None):
The database flavor, e.g.
'sqlite'
,'postgresql'
,'cockroachdb'
, etc. To see supported flavors, run thebootstrap connectors
command. - wait (bool, default False):
If
True
, block until a database connection has been made. Defaults toFalse
. - connect (bool, default False):
If
True
, immediately attempt to connect the database and raise a warning if the connection fails. Defaults toFalse
. - debug (bool, default False):
Verbosity toggle.
Defaults to
False
. - kw (Any): All other arguments will be passed to the connector's attributes. Therefore, a connector may be made without being registered, as long enough parameters are supplied to the constructor.
218 @property 219 def Session(self): 220 if '_Session' not in self.__dict__: 221 if self.engine is None: 222 return None 223 224 from meerschaum.utils.packages import attempt_import 225 sqlalchemy_orm = attempt_import('sqlalchemy.orm') 226 session_factory = sqlalchemy_orm.sessionmaker(self.engine) 227 self._Session = sqlalchemy_orm.scoped_session(session_factory) 228 229 return self._Session
231 @property 232 def engine(self): 233 """ 234 Return the SQLAlchemy engine connected to the configured database. 235 """ 236 import os 237 import threading 238 if '_engine' not in self.__dict__: 239 self._engine, self._engine_str = self.create_engine(include_uri=True) 240 241 same_process = os.getpid() == self._pid 242 same_thread = threading.current_thread().ident == self._thread_ident 243 244 ### handle child processes 245 if not same_process: 246 self._pid = os.getpid() 247 self._thread = threading.current_thread() 248 warn("Different PID detected. Disposing of connections...") 249 self._engine.dispose() 250 251 ### handle different threads 252 if not same_thread: 253 if self.flavor == 'duckdb': 254 warn("Different thread detected.") 255 self._engine.dispose() 256 257 return self._engine
Return the SQLAlchemy engine connected to the configured database.
259 @property 260 def DATABASE_URL(self) -> str: 261 """ 262 Return the URI connection string (alias for `SQLConnector.URI`. 263 """ 264 _ = self.engine 265 return str(self._engine_str)
Return the URI connection string (alias for SQLConnector.URI
.
267 @property 268 def URI(self) -> str: 269 """ 270 Return the URI connection string. 271 """ 272 _ = self.engine 273 return str(self._engine_str)
Return the URI connection string.
275 @property 276 def IS_THREAD_SAFE(self) -> str: 277 """ 278 Return whether this connector may be multithreaded. 279 """ 280 if self.flavor in ('duckdb', 'oracle'): 281 return False 282 if self.flavor == 'sqlite': 283 return ':memory:' not in self.URI 284 return True
Return whether this connector may be multithreaded.
287 @property 288 def metadata(self): 289 """ 290 Return the metadata bound to this configured schema. 291 """ 292 from meerschaum.utils.packages import attempt_import 293 sqlalchemy = attempt_import('sqlalchemy') 294 if '_metadata' not in self.__dict__: 295 self._metadata = sqlalchemy.MetaData(schema=self.schema) 296 return self._metadata
Return the metadata bound to this configured schema.
299 @property 300 def instance_schema(self): 301 """ 302 Return the schema name for Meerschaum tables. 303 """ 304 return self.schema
Return the schema name for Meerschaum tables.
307 @property 308 def internal_schema(self): 309 """ 310 Return the schema name for internal tables. 311 """ 312 from meerschaum.config.static import STATIC_CONFIG 313 from meerschaum.utils.packages import attempt_import 314 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 315 schema_name = self.__dict__.get('internal_schema', None) or ( 316 STATIC_CONFIG['sql']['internal_schema'] 317 if self.flavor not in NO_SCHEMA_FLAVORS 318 else self.schema 319 ) 320 321 if '_internal_schema' not in self.__dict__: 322 self._internal_schema = schema_name 323 return self._internal_schema
Return the schema name for internal tables.
326 @property 327 def db(self) -> Optional[databases.Database]: 328 from meerschaum.utils.packages import attempt_import 329 databases = attempt_import('databases', lazy=False, install=True) 330 url = self.DATABASE_URL 331 if 'mysql' in url: 332 url = url.replace('+pymysql', '') 333 if '_db' not in self.__dict__: 334 try: 335 self._db = databases.Database(url) 336 except KeyError: 337 ### Likely encountered an unsupported flavor. 338 from meerschaum.utils.warnings import warn 339 self._db = None 340 return self._db
343 @property 344 def db_version(self) -> Union[str, None]: 345 """ 346 Return the database version. 347 """ 348 _db_version = self.__dict__.get('_db_version', None) 349 if _db_version is not None: 350 return _db_version 351 352 from meerschaum.utils.sql import get_db_version 353 self._db_version = get_db_version(self) 354 return self._db_version
Return the database version.
357 @property 358 def schema(self) -> Union[str, None]: 359 """ 360 Return the default schema to use. 361 A value of `None` will not prepend a schema. 362 """ 363 if 'schema' in self.__dict__: 364 return self.__dict__['schema'] 365 366 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 367 if self.flavor in NO_SCHEMA_FLAVORS: 368 self.__dict__['schema'] = None 369 return None 370 371 sqlalchemy = mrsm.attempt_import('sqlalchemy') 372 _schema = sqlalchemy.inspect(self.engine).default_schema_name 373 self.__dict__['schema'] = _schema 374 return _schema
Return the default schema to use.
A value of None
will not prepend a schema.
180def create_engine( 181 self, 182 include_uri: bool = False, 183 debug: bool = False, 184 **kw 185) -> 'sqlalchemy.engine.Engine': 186 """Create a sqlalchemy engine by building the engine string.""" 187 from meerschaum.utils.packages import attempt_import 188 from meerschaum.utils.warnings import error, warn 189 sqlalchemy = attempt_import('sqlalchemy') 190 import urllib 191 import copy 192 ### Install and patch required drivers. 193 if self.flavor in install_flavor_drivers: 194 attempt_import(*install_flavor_drivers[self.flavor], debug=debug, lazy=False, warn=False) 195 if self.flavor == 'mssql': 196 pyodbc = attempt_import('pyodbc', debug=debug, lazy=False, warn=False) 197 pyodbc.pooling = False 198 if self.flavor in require_patching_flavors: 199 from meerschaum.utils.packages import determine_version, _monkey_patch_get_distribution 200 import pathlib 201 for install_name, import_name in require_patching_flavors[self.flavor]: 202 pkg = attempt_import( 203 import_name, 204 debug=debug, 205 lazy=False, 206 warn=False 207 ) 208 _monkey_patch_get_distribution( 209 install_name, determine_version(pathlib.Path(pkg.__file__), venv='mrsm') 210 ) 211 212 ### supplement missing values with defaults (e.g. port number) 213 for a, value in flavor_configs[self.flavor]['defaults'].items(): 214 if a not in self.__dict__: 215 self.__dict__[a] = value 216 217 ### Verify that everything is in order. 218 if self.flavor not in flavor_configs: 219 error(f"Cannot create a connector with the flavor '{self.flavor}'.") 220 221 _engine = flavor_configs[self.flavor].get('engine', None) 222 _username = self.__dict__.get('username', None) 223 _password = self.__dict__.get('password', None) 224 _host = self.__dict__.get('host', None) 225 _port = self.__dict__.get('port', None) 226 _database = self.__dict__.get('database', None) 227 _options = self.__dict__.get('options', {}) 228 if isinstance(_options, str): 229 _options = dict(urllib.parse.parse_qsl(_options)) 230 _uri = self.__dict__.get('uri', None) 231 232 ### Handle registering specific dialects (due to installing in virtual environments). 233 if self.flavor in flavor_dialects: 234 sqlalchemy.dialects.registry.register(*flavor_dialects[self.flavor]) 235 236 ### self._sys_config was deepcopied and can be updated safely 237 if self.flavor in ("sqlite", "duckdb"): 238 engine_str = f"{_engine}:///{_database}" if not _uri else _uri 239 if 'create_engine' not in self._sys_config: 240 self._sys_config['create_engine'] = {} 241 if 'connect_args' not in self._sys_config['create_engine']: 242 self._sys_config['create_engine']['connect_args'] = {} 243 self._sys_config['create_engine']['connect_args'].update({"check_same_thread" : False}) 244 else: 245 engine_str = ( 246 _engine + "://" + (_username if _username is not None else '') + 247 ((":" + urllib.parse.quote_plus(_password)) if _password is not None else '') + 248 "@" + _host + ((":" + str(_port)) if _port is not None else '') + 249 (("/" + _database) if _database is not None else '') 250 + (("?" + urllib.parse.urlencode(_options)) if _options else '') 251 ) if not _uri else _uri 252 253 ### Sometimes the timescaledb:// flavor can slip in. 254 if _uri and self.flavor in ('timescaledb',) and self.flavor in _uri: 255 engine_str = engine_str.replace(f'{self.flavor}', 'postgresql', 1) 256 257 if debug: 258 dprint( 259 ( 260 (engine_str.replace(':' + _password, ':' + ('*' * len(_password)))) 261 if _password is not None else engine_str 262 ) + '\n' + f"{self._sys_config.get('create_engine', {}).get('connect_args', {})}" 263 ) 264 265 _kw_copy = copy.deepcopy(kw) 266 267 ### NOTE: Order of inheritance: 268 ### 1. Defaults 269 ### 2. System configuration 270 ### 3. Connector configuration 271 ### 4. Keyword arguments 272 _create_engine_args = flavor_configs.get(self.flavor, {}).get('create_engine', {}) 273 def _apply_create_engine_args(update): 274 if 'ALL' not in flavor_configs[self.flavor].get('omit_create_engine', {}): 275 _create_engine_args.update( 276 { k: v for k, v in update.items() 277 if 'omit_create_engine' not in flavor_configs[self.flavor] 278 or k not in flavor_configs[self.flavor].get('omit_create_engine') 279 } 280 ) 281 _apply_create_engine_args(self._sys_config.get('create_engine', {})) 282 _apply_create_engine_args(self.__dict__.get('create_engine', {})) 283 _apply_create_engine_args(_kw_copy) 284 285 try: 286 engine = sqlalchemy.create_engine( 287 engine_str, 288 ### I know this looks confusing, and maybe it's bad code, 289 ### but it's simple. It dynamically parses the config string 290 ### and splits it to separate the class name (QueuePool) 291 ### from the module name (sqlalchemy.pool). 292 poolclass = getattr( 293 attempt_import( 294 ".".join(self._sys_config['poolclass'].split('.')[:-1]) 295 ), 296 self._sys_config['poolclass'].split('.')[-1] 297 ), 298 echo = debug, 299 **_create_engine_args 300 ) 301 except Exception as e: 302 warn(f"Failed to create connector '{self}':\n{traceback.format_exc()}", stack=False) 303 engine = None 304 305 if include_uri: 306 return engine, engine_str 307 return engine
Create a sqlalchemy engine by building the engine string.
26def read( 27 self, 28 query_or_table: Union[str, sqlalchemy.Query], 29 params: Union[Dict[str, Any], List[str], None] = None, 30 dtype: Optional[Dict[str, Any]] = None, 31 coerce_float: bool = True, 32 chunksize: Optional[int] = -1, 33 workers: Optional[int] = None, 34 chunk_hook: Optional[Callable[[pandas.DataFrame], Any]] = None, 35 as_hook_results: bool = False, 36 chunks: Optional[int] = None, 37 schema: Optional[str] = None, 38 as_chunks: bool = False, 39 as_iterator: bool = False, 40 as_dask: bool = False, 41 index_col: Optional[str] = None, 42 silent: bool = False, 43 debug: bool = False, 44 **kw: Any 45) -> Union[ 46 pandas.DataFrame, 47 dask.DataFrame, 48 List[pandas.DataFrame], 49 List[Any], 50 None, 51]: 52 """ 53 Read a SQL query or table into a pandas dataframe. 54 55 Parameters 56 ---------- 57 query_or_table: Union[str, sqlalchemy.Query] 58 The SQL query (sqlalchemy Query or string) or name of the table from which to select. 59 60 params: Optional[Dict[str, Any]], default None 61 `List` or `Dict` of parameters to pass to `pandas.read_sql()`. 62 See the pandas documentation for more information: 63 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html 64 65 dtype: Optional[Dict[str, Any]], default None 66 A dictionary of data types to pass to `pandas.read_sql()`. 67 See the pandas documentation for more information: 68 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html 69 70 chunksize: Optional[int], default -1 71 How many chunks to read at a time. `None` will read everything in one large chunk. 72 Defaults to system configuration. 73 74 **NOTE:** DuckDB does not allow for chunking. 75 76 workers: Optional[int], default None 77 How many threads to use when consuming the generator. 78 Only applies if `chunk_hook` is provided. 79 80 chunk_hook: Optional[Callable[[pandas.DataFrame], Any]], default None 81 Hook function to execute once per chunk, e.g. writing and reading chunks intermittently. 82 See `--sync-chunks` for an example. 83 **NOTE:** `as_iterator` MUST be False (default). 84 85 as_hook_results: bool, default False 86 If `True`, return a `List` of the outputs of the hook function. 87 Only applicable if `chunk_hook` is not None. 88 89 **NOTE:** `as_iterator` MUST be `False` (default). 90 91 chunks: Optional[int], default None 92 Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and 93 return into a single dataframe. 94 For example, to limit the returned dataframe to 100,000 rows, 95 you could specify a `chunksize` of `1000` and `chunks` of `100`. 96 97 schema: Optional[str], default None 98 If just a table name is provided, optionally specify the table schema. 99 Defaults to `SQLConnector.schema`. 100 101 as_chunks: bool, default False 102 If `True`, return a list of DataFrames. 103 Otherwise return a single DataFrame. 104 105 as_iterator: bool, default False 106 If `True`, return the pandas DataFrame iterator. 107 `chunksize` must not be `None` (falls back to 1000 if so), 108 and hooks are not called in this case. 109 110 index_col: Optional[str], default None 111 If using Dask, use this column as the index column. 112 If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame. 113 114 silent: bool, default False 115 If `True`, don't raise warnings in case of errors. 116 Defaults to `False`. 117 118 Returns 119 ------- 120 A `pd.DataFrame` (default case), or an iterator, or a list of dataframes / iterators, 121 or `None` if something breaks. 122 123 """ 124 if chunks is not None and chunks <= 0: 125 return [] 126 from meerschaum.utils.sql import sql_item_name, truncate_item_name 127 from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone 128 from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS, TIMEZONE_NAIVE_FLAVORS 129 from meerschaum.utils.packages import attempt_import, import_pandas 130 from meerschaum.utils.pool import get_pool 131 from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols 132 import warnings 133 import traceback 134 from decimal import Decimal 135 pd = import_pandas() 136 dd = None 137 is_dask = 'dask' in pd.__name__ 138 pandas = attempt_import('pandas') 139 is_dask = dd is not None 140 npartitions = chunksize_to_npartitions(chunksize) 141 if is_dask: 142 chunksize = None 143 schema = schema or self.schema 144 utc_dt_cols = [ 145 col 146 for col, typ in dtype.items() 147 if are_dtypes_equal(typ, 'datetime') and 'utc' in typ.lower() 148 ] if dtype else [] 149 150 if dtype and utc_dt_cols and self.flavor in TIMEZONE_NAIVE_FLAVORS: 151 dtype = dtype.copy() 152 for col in utc_dt_cols: 153 dtype[col] = 'datetime64[ns]' 154 155 pool = get_pool(workers=workers) 156 sqlalchemy = attempt_import("sqlalchemy") 157 default_chunksize = self._sys_config.get('chunksize', None) 158 chunksize = chunksize if chunksize != -1 else default_chunksize 159 if chunksize is None and as_iterator: 160 if not silent and self.flavor not in _disallow_chunks_flavors: 161 warn( 162 "An iterator may only be generated if chunksize is not None.\n" 163 + "Falling back to a chunksize of 1000.", stacklevel=3, 164 ) 165 chunksize = 1000 166 if chunksize is not None and self.flavor in _max_chunks_flavors: 167 if chunksize > _max_chunks_flavors[self.flavor]: 168 if chunksize != default_chunksize: 169 warn( 170 f"The specified chunksize of {chunksize} exceeds the maximum of " 171 + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n" 172 + f" Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.", 173 stacklevel=3, 174 ) 175 chunksize = _max_chunks_flavors[self.flavor] 176 177 if chunksize is not None and self.flavor in _disallow_chunks_flavors: 178 chunksize = None 179 180 if debug: 181 import time 182 start = time.perf_counter() 183 dprint(f"[{self}]\n{query_or_table}") 184 dprint(f"[{self}] Fetching with chunksize: {chunksize}") 185 186 ### This might be sqlalchemy object or the string of a table name. 187 ### We check for spaces and quotes to see if it might be a weird table. 188 if ( 189 ' ' not in str(query_or_table) 190 or ( 191 ' ' in str(query_or_table) 192 and str(query_or_table).startswith('"') 193 and str(query_or_table).endswith('"') 194 ) 195 ): 196 truncated_table_name = truncate_item_name(str(query_or_table), self.flavor) 197 if truncated_table_name != str(query_or_table) and not silent: 198 warn( 199 f"Table '{query_or_table}' is too long for '{self.flavor}'," 200 + f" will instead read the table '{truncated_table_name}'." 201 ) 202 203 query_or_table = sql_item_name(str(query_or_table), self.flavor, schema) 204 if debug: 205 dprint(f"[{self}] Reading from table {query_or_table}") 206 formatted_query = sqlalchemy.text("SELECT * FROM " + str(query_or_table)) 207 str_query = f"SELECT * FROM {query_or_table}" 208 else: 209 str_query = query_or_table 210 211 formatted_query = ( 212 sqlalchemy.text(str_query) 213 if not is_dask and isinstance(str_query, str) 214 else format_sql_query_for_dask(str_query) 215 ) 216 217 chunk_list = [] 218 chunk_hook_results = [] 219 def _process_chunk(_chunk, _retry_on_failure: bool = True): 220 if self.flavor in TIMEZONE_NAIVE_FLAVORS: 221 for col in utc_dt_cols: 222 _chunk[col] = coerce_timezone(_chunk[col], strip_timezone=False) 223 if not as_hook_results: 224 chunk_list.append(_chunk) 225 if chunk_hook is None: 226 return None 227 228 result = None 229 try: 230 result = chunk_hook( 231 _chunk, 232 workers=workers, 233 chunksize=chunksize, 234 debug=debug, 235 **kw 236 ) 237 except Exception: 238 result = False, traceback.format_exc() 239 from meerschaum.utils.formatting import get_console 240 if not silent: 241 get_console().print_exception() 242 243 ### If the chunk fails to process, try it again one more time. 244 if isinstance(result, tuple) and result[0] is False: 245 if _retry_on_failure: 246 return _process_chunk(_chunk, _retry_on_failure=False) 247 248 return result 249 250 try: 251 stream_results = not as_iterator and chunk_hook is not None and chunksize is not None 252 with warnings.catch_warnings(): 253 warnings.filterwarnings('ignore', 'case sensitivity issues') 254 255 read_sql_query_kwargs = { 256 'params': params, 257 'dtype': dtype, 258 'coerce_float': coerce_float, 259 'index_col': index_col, 260 } 261 if is_dask: 262 if index_col is None: 263 dd = None 264 pd = attempt_import('pandas') 265 read_sql_query_kwargs.update({ 266 'chunksize': chunksize, 267 }) 268 else: 269 read_sql_query_kwargs.update({ 270 'chunksize': chunksize, 271 }) 272 273 if is_dask and dd is not None: 274 ddf = dd.read_sql_query( 275 formatted_query, 276 self.URI, 277 **read_sql_query_kwargs 278 ) 279 else: 280 281 def get_chunk_generator(connectable): 282 chunk_generator = pd.read_sql_query( 283 formatted_query, 284 self.engine, 285 **read_sql_query_kwargs 286 ) 287 to_return = ( 288 chunk_generator 289 if as_iterator or chunksize is None 290 else ( 291 list(pool.imap(_process_chunk, chunk_generator)) 292 if as_hook_results 293 else None 294 ) 295 ) 296 return chunk_generator, to_return 297 298 if self.flavor in SKIP_READ_TRANSACTION_FLAVORS: 299 chunk_generator, to_return = get_chunk_generator(self.engine) 300 else: 301 with self.engine.begin() as transaction: 302 with transaction.execution_options(stream_results=stream_results) as connection: 303 chunk_generator, to_return = get_chunk_generator(connection) 304 305 if to_return is not None: 306 return to_return 307 308 except Exception as e: 309 if debug: 310 dprint(f"[{self}] Failed to execute query:\n\n{query_or_table}\n\n") 311 if not silent: 312 warn(str(e), stacklevel=3) 313 from meerschaum.utils.formatting import get_console 314 if not silent: 315 get_console().print_exception() 316 317 return None 318 319 if is_dask and dd is not None: 320 ddf = ddf.reset_index() 321 return ddf 322 323 chunk_list = [] 324 read_chunks = 0 325 chunk_hook_results = [] 326 if chunksize is None: 327 chunk_list.append(chunk_generator) 328 elif as_iterator: 329 return chunk_generator 330 else: 331 try: 332 for chunk in chunk_generator: 333 if chunk_hook is not None: 334 chunk_hook_results.append( 335 chunk_hook(chunk, chunksize=chunksize, debug=debug, **kw) 336 ) 337 chunk_list.append(chunk) 338 read_chunks += 1 339 if chunks is not None and read_chunks >= chunks: 340 break 341 except Exception as e: 342 warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3) 343 from meerschaum.utils.formatting import get_console 344 if not silent: 345 get_console().print_exception() 346 347 read_chunks = 0 348 try: 349 for chunk in chunk_generator: 350 if chunk_hook is not None: 351 chunk_hook_results.append( 352 chunk_hook(chunk, chunksize=chunksize, debug=debug, **kw) 353 ) 354 chunk_list.append(chunk) 355 read_chunks += 1 356 if chunks is not None and read_chunks >= chunks: 357 break 358 except Exception as e: 359 warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3) 360 from meerschaum.utils.formatting import get_console 361 if not silent: 362 get_console().print_exception() 363 364 return None 365 366 ### If no chunks returned, read without chunks 367 ### to get columns 368 if len(chunk_list) == 0: 369 with warnings.catch_warnings(): 370 warnings.filterwarnings('ignore', 'case sensitivity issues') 371 _ = read_sql_query_kwargs.pop('chunksize', None) 372 with self.engine.begin() as connection: 373 chunk_list.append( 374 pd.read_sql_query( 375 formatted_query, 376 connection, 377 **read_sql_query_kwargs 378 ) 379 ) 380 381 ### call the hook on any missed chunks. 382 if chunk_hook is not None and len(chunk_list) > len(chunk_hook_results): 383 for c in chunk_list[len(chunk_hook_results):]: 384 chunk_hook_results.append( 385 chunk_hook(c, chunksize=chunksize, debug=debug, **kw) 386 ) 387 388 ### chunksize is not None so must iterate 389 if debug: 390 end = time.perf_counter() 391 dprint(f"Fetched {len(chunk_list)} chunks in {round(end - start, 2)} seconds.") 392 393 if as_hook_results: 394 return chunk_hook_results 395 396 ### Skip `pd.concat()` if `as_chunks` is specified. 397 if as_chunks: 398 for c in chunk_list: 399 c.reset_index(drop=True, inplace=True) 400 for col in get_numeric_cols(c): 401 c[col] = c[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x) 402 return chunk_list 403 404 df = pd.concat(chunk_list).reset_index(drop=True) 405 ### NOTE: The calls to `canonical()` are to drop leading and trailing zeroes. 406 for col in get_numeric_cols(df): 407 df[col] = df[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x) 408 409 return df
Read a SQL query or table into a pandas dataframe.
Parameters
- query_or_table (Union[str, sqlalchemy.Query]): The SQL query (sqlalchemy Query or string) or name of the table from which to select.
- params (Optional[Dict[str, Any]], default None):
List
orDict
of parameters to pass topandas.read_sql()
. See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html - dtype (Optional[Dict[str, Any]], default None):
A dictionary of data types to pass to
pandas.read_sql()
. See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html chunksize (Optional[int], default -1): How many chunks to read at a time.
None
will read everything in one large chunk. Defaults to system configuration.NOTE: DuckDB does not allow for chunking.
- workers (Optional[int], default None):
How many threads to use when consuming the generator.
Only applies if
chunk_hook
is provided. - chunk_hook (Optional[Callable[[pandas.DataFrame], Any]], default None):
Hook function to execute once per chunk, e.g. writing and reading chunks intermittently.
See
--sync-chunks
for an example. NOTE:as_iterator
MUST be False (default). as_hook_results (bool, default False): If
True
, return aList
of the outputs of the hook function. Only applicable ifchunk_hook
is not None.NOTE:
as_iterator
MUST beFalse
(default).- chunks (Optional[int], default None):
Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and
return into a single dataframe.
For example, to limit the returned dataframe to 100,000 rows,
you could specify a
chunksize
of1000
andchunks
of100
. - schema (Optional[str], default None):
If just a table name is provided, optionally specify the table schema.
Defaults to
SQLConnector.schema
. - as_chunks (bool, default False):
If
True
, return a list of DataFrames. Otherwise return a single DataFrame. - as_iterator (bool, default False):
If
True
, return the pandas DataFrame iterator.chunksize
must not beNone
(falls back to 1000 if so), and hooks are not called in this case. - index_col (Optional[str], default None): If using Dask, use this column as the index column. If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
- silent (bool, default False):
If
True
, don't raise warnings in case of errors. Defaults toFalse
.
Returns
- A
pd.DataFrame
(default case), or an iterator, or a list of dataframes / iterators, - or
None
if something breaks.
412def value( 413 self, 414 query: str, 415 *args: Any, 416 use_pandas: bool = False, 417 **kw: Any 418) -> Any: 419 """ 420 Execute the provided query and return the first value. 421 422 Parameters 423 ---------- 424 query: str 425 The SQL query to execute. 426 427 *args: Any 428 The arguments passed to `meerschaum.connectors.sql.SQLConnector.exec` 429 if `use_pandas` is `False` (default) or to `meerschaum.connectors.sql.SQLConnector.read`. 430 431 use_pandas: bool, default False 432 If `True`, use `meerschaum.connectors.SQLConnector.read`, otherwise use 433 `meerschaum.connectors.sql.SQLConnector.exec` (default). 434 **NOTE:** This is always `True` for DuckDB. 435 436 **kw: Any 437 See `args`. 438 439 Returns 440 ------- 441 Any value returned from the query. 442 443 """ 444 from meerschaum.utils.packages import attempt_import 445 sqlalchemy = attempt_import('sqlalchemy') 446 if self.flavor == 'duckdb': 447 use_pandas = True 448 if use_pandas: 449 try: 450 return self.read(query, *args, **kw).iloc[0, 0] 451 except Exception: 452 return None 453 454 _close = kw.get('close', True) 455 _commit = kw.get('commit', (self.flavor != 'mssql')) 456 457 # _close = True 458 # _commit = True 459 460 try: 461 result, connection = self.exec( 462 query, 463 *args, 464 with_connection=True, 465 close=False, 466 commit=_commit, 467 **kw 468 ) 469 first = result.first() if result is not None else None 470 _val = first[0] if first is not None else None 471 except Exception as e: 472 warn(e, stacklevel=3) 473 return None 474 if _close: 475 try: 476 connection.close() 477 except Exception as e: 478 warn("Failed to close connection with exception:\n" + str(e)) 479 return _val
Execute the provided query and return the first value.
Parameters
- query (str): The SQL query to execute.
- *args (Any):
The arguments passed to
meerschaum.connectors.sql.SQLConnector.exec
ifuse_pandas
isFalse
(default) or tomeerschaum.connectors.sql.SQLConnector.read
. - use_pandas (bool, default False):
If
True
, usemeerschaum.connectors.SQLConnector.read
, otherwise usemeerschaum.connectors.sql.SQLConnector.exec
(default). NOTE: This is alwaysTrue
for DuckDB. - **kw (Any):
See
args
.
Returns
- Any value returned from the query.
493def exec( 494 self, 495 query: str, 496 *args: Any, 497 silent: bool = False, 498 debug: bool = False, 499 commit: Optional[bool] = None, 500 close: Optional[bool] = None, 501 with_connection: bool = False, 502 _connection=None, 503 _transaction=None, 504 **kw: Any 505) -> Union[ 506 sqlalchemy.engine.result.resultProxy, 507 sqlalchemy.engine.cursor.LegacyCursorResult, 508 Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection], 509 Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection], 510 None 511]: 512 """ 513 Execute SQL code and return the `sqlalchemy` result, e.g. when calling stored procedures. 514 515 If inserting data, please use bind variables to avoid SQL injection! 516 517 Parameters 518 ---------- 519 query: Union[str, List[str], Tuple[str]] 520 The query to execute. 521 If `query` is a list or tuple, call `self.exec_queries()` instead. 522 523 args: Any 524 Arguments passed to `sqlalchemy.engine.execute`. 525 526 silent: bool, default False 527 If `True`, suppress warnings. 528 529 commit: Optional[bool], default None 530 If `True`, commit the changes after execution. 531 Causes issues with flavors like `'mssql'`. 532 This does not apply if `query` is a list of strings. 533 534 close: Optional[bool], default None 535 If `True`, close the connection after execution. 536 Causes issues with flavors like `'mssql'`. 537 This does not apply if `query` is a list of strings. 538 539 with_connection: bool, default False 540 If `True`, return a tuple including the connection object. 541 This does not apply if `query` is a list of strings. 542 543 Returns 544 ------- 545 The `sqlalchemy` result object, or a tuple with the connection if `with_connection` is provided. 546 547 """ 548 if isinstance(query, (list, tuple)): 549 return self.exec_queries( 550 list(query), 551 *args, 552 silent=silent, 553 debug=debug, 554 **kw 555 ) 556 557 from meerschaum.utils.packages import attempt_import 558 sqlalchemy = attempt_import("sqlalchemy") 559 if debug: 560 dprint(f"[{self}] Executing query:\n{query}") 561 562 _close = close if close is not None else (self.flavor != 'mssql') 563 _commit = commit if commit is not None else ( 564 (self.flavor != 'mssql' or 'select' not in str(query).lower()) 565 ) 566 567 ### Select and Insert objects need to be compiled (SQLAlchemy 2.0.0+). 568 if not hasattr(query, 'compile'): 569 query = sqlalchemy.text(query) 570 571 connection = _connection if _connection is not None else self.get_connection() 572 573 try: 574 transaction = ( 575 _transaction 576 if _transaction is not None else ( 577 connection.begin() 578 if _commit 579 else None 580 ) 581 ) 582 except sqlalchemy.exc.InvalidRequestError as e: 583 if _connection is not None or _transaction is not None: 584 raise e 585 connection = self.get_connection(rebuild=True) 586 transaction = connection.begin() 587 588 if transaction is not None and not transaction.is_active and _transaction is not None: 589 connection = self.get_connection(rebuild=True) 590 transaction = connection.begin() if _commit else None 591 592 result = None 593 try: 594 result = connection.execute(query, *args, **kw) 595 if _commit: 596 transaction.commit() 597 except Exception as e: 598 if debug: 599 dprint(f"[{self}] Failed to execute query:\n\n{query}\n\n{e}") 600 if not silent: 601 warn(str(e), stacklevel=3) 602 result = None 603 if _commit: 604 transaction.rollback() 605 connection = self.get_connection(rebuild=True) 606 finally: 607 if _close: 608 connection.close() 609 610 if with_connection: 611 return result, connection 612 613 return result
Execute SQL code and return the sqlalchemy
result, e.g. when calling stored procedures.
If inserting data, please use bind variables to avoid SQL injection!
Parameters
- query (Union[str, List[str], Tuple[str]]):
The query to execute.
If
query
is a list or tuple, callself.exec_queries()
instead. - args (Any):
Arguments passed to
sqlalchemy.engine.execute
. - silent (bool, default False):
If
True
, suppress warnings. - commit (Optional[bool], default None):
If
True
, commit the changes after execution. Causes issues with flavors like'mssql'
. This does not apply ifquery
is a list of strings. - close (Optional[bool], default None):
If
True
, close the connection after execution. Causes issues with flavors like'mssql'
. This does not apply ifquery
is a list of strings. - with_connection (bool, default False):
If
True
, return a tuple including the connection object. This does not apply ifquery
is a list of strings.
Returns
- The
sqlalchemy
result object, or a tuple with the connection ifwith_connection
is provided.
482def execute( 483 self, 484 *args : Any, 485 **kw : Any 486) -> Optional[sqlalchemy.engine.result.resultProxy]: 487 """ 488 An alias for `meerschaum.connectors.sql.SQLConnector.exec`. 489 """ 490 return self.exec(*args, **kw)
An alias for meerschaum.connectors.sql.SQLConnector.exec
.
710def to_sql( 711 self, 712 df: pandas.DataFrame, 713 name: str = None, 714 index: bool = False, 715 if_exists: str = 'replace', 716 method: str = "", 717 chunksize: Optional[int] = -1, 718 schema: Optional[str] = None, 719 silent: bool = False, 720 debug: bool = False, 721 as_tuple: bool = False, 722 as_dict: bool = False, 723 _connection=None, 724 _transaction=None, 725 **kw 726) -> Union[bool, SuccessTuple]: 727 """ 728 Upload a DataFrame's contents to the SQL server. 729 730 Parameters 731 ---------- 732 df: pd.DataFrame 733 The DataFrame to be uploaded. 734 735 name: str 736 The name of the table to be created. 737 738 index: bool, default False 739 If True, creates the DataFrame's indices as columns. 740 741 if_exists: str, default 'replace' 742 Drop and create the table ('replace') or append if it exists 743 ('append') or raise Exception ('fail'). 744 Options are ['replace', 'append', 'fail']. 745 746 method: str, default '' 747 None or multi. Details on pandas.to_sql. 748 749 chunksize: Optional[int], default -1 750 How many rows to insert at a time. 751 752 schema: Optional[str], default None 753 Optionally override the schema for the table. 754 Defaults to `SQLConnector.schema`. 755 756 as_tuple: bool, default False 757 If `True`, return a (success_bool, message) tuple instead of a `bool`. 758 Defaults to `False`. 759 760 as_dict: bool, default False 761 If `True`, return a dictionary of transaction information. 762 The keys are `success`, `msg`, `start`, `end`, `duration`, `num_rows`, `chunksize`, 763 `method`, and `target`. 764 765 kw: Any 766 Additional arguments will be passed to the DataFrame's `to_sql` function 767 768 Returns 769 ------- 770 Either a `bool` or a `SuccessTuple` (depends on `as_tuple`). 771 """ 772 import time 773 import json 774 import decimal 775 from decimal import Decimal, Context 776 from meerschaum.utils.warnings import error, warn 777 import warnings 778 import functools 779 if name is None: 780 error(f"Name must not be `None` to insert data into {self}.") 781 782 ### We're requiring `name` to be positional, and sometimes it's passed in from background jobs. 783 kw.pop('name', None) 784 785 schema = schema or self.schema 786 787 from meerschaum.utils.sql import ( 788 sql_item_name, 789 table_exists, 790 json_flavors, 791 truncate_item_name, 792 DROP_IF_EXISTS_FLAVORS, 793 ) 794 from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols 795 from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone 796 from meerschaum.utils.dtypes.sql import ( 797 NUMERIC_PRECISION_FLAVORS, 798 PD_TO_SQLALCHEMY_DTYPES_FLAVORS, 799 get_db_type_from_pd_type, 800 ) 801 from meerschaum.connectors.sql._create_engine import flavor_configs 802 from meerschaum.utils.packages import attempt_import, import_pandas 803 sqlalchemy = attempt_import('sqlalchemy', debug=debug) 804 pd = import_pandas() 805 is_dask = 'dask' in df.__module__ 806 807 stats = {'target': name, } 808 ### resort to defaults if None 809 if method == "": 810 if self.flavor in _bulk_flavors: 811 method = functools.partial(psql_insert_copy, schema=self.schema) 812 else: 813 ### Should resolve to 'multi' or `None`. 814 method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi') 815 stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method) 816 817 default_chunksize = self._sys_config.get('chunksize', None) 818 chunksize = chunksize if chunksize != -1 else default_chunksize 819 if chunksize is not None and self.flavor in _max_chunks_flavors: 820 if chunksize > _max_chunks_flavors[self.flavor]: 821 if chunksize != default_chunksize: 822 warn( 823 f"The specified chunksize of {chunksize} exceeds the maximum of " 824 + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n" 825 + f" Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.", 826 stacklevel = 3, 827 ) 828 chunksize = _max_chunks_flavors[self.flavor] 829 stats['chunksize'] = chunksize 830 831 success, msg = False, "Default to_sql message" 832 start = time.perf_counter() 833 if debug: 834 msg = f"[{self}] Inserting {len(df)} rows with chunksize: {chunksize}..." 835 print(msg, end="", flush=True) 836 stats['num_rows'] = len(df) 837 838 ### Check if the name is too long. 839 truncated_name = truncate_item_name(name, self.flavor) 840 if name != truncated_name: 841 warn( 842 f"Table '{name}' is too long for '{self.flavor}'," 843 + f" will instead create the table '{truncated_name}'." 844 ) 845 846 ### filter out non-pandas args 847 import inspect 848 to_sql_params = inspect.signature(df.to_sql).parameters 849 to_sql_kw = {} 850 for k, v in kw.items(): 851 if k in to_sql_params: 852 to_sql_kw[k] = v 853 854 to_sql_kw.update({ 855 'name': truncated_name, 856 'schema': schema, 857 ('con' if not is_dask else 'uri'): (self.engine if not is_dask else self.URI), 858 'index': index, 859 'if_exists': if_exists, 860 'method': method, 861 'chunksize': chunksize, 862 }) 863 if is_dask: 864 to_sql_kw.update({ 865 'parallel': True, 866 }) 867 elif _connection is not None: 868 to_sql_kw['con'] = _connection 869 870 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 871 if self.flavor == 'oracle': 872 ### For some reason 'replace' doesn't work properly in pandas, 873 ### so try dropping first. 874 if if_exists == 'replace' and table_exists(name, self, schema=schema, debug=debug): 875 success = self.exec( 876 f"DROP TABLE {if_exists_str}" + sql_item_name(name, 'oracle', schema) 877 ) is not None 878 if not success: 879 warn(f"Unable to drop {name}") 880 881 ### Enforce NVARCHAR(2000) as text instead of CLOB. 882 dtype = to_sql_kw.get('dtype', {}) 883 for col, typ in df.dtypes.items(): 884 if are_dtypes_equal(str(typ), 'object'): 885 dtype[col] = sqlalchemy.types.NVARCHAR(2000) 886 elif are_dtypes_equal(str(typ), 'int'): 887 dtype[col] = sqlalchemy.types.INTEGER 888 to_sql_kw['dtype'] = dtype 889 elif self.flavor == 'duckdb': 890 dtype = to_sql_kw.get('dtype', {}) 891 dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')] 892 for col in dt_cols: 893 df[col] = coerce_timezone(df[col], strip_utc=False) 894 elif self.flavor == 'mssql': 895 dtype = to_sql_kw.get('dtype', {}) 896 dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')] 897 new_dtype = {} 898 for col in dt_cols: 899 if col in dtype: 900 continue 901 dt_typ = get_db_type_from_pd_type(str(df.dtypes[col]), self.flavor, as_sqlalchemy=True) 902 if col not in dtype: 903 new_dtype[col] = dt_typ 904 905 dtype.update(new_dtype) 906 to_sql_kw['dtype'] = dtype 907 908 ### Check for JSON columns. 909 if self.flavor not in json_flavors: 910 json_cols = get_json_cols(df) 911 if json_cols: 912 for col in json_cols: 913 df[col] = df[col].apply( 914 ( 915 lambda x: json.dumps(x, default=str, sort_keys=True) 916 if not isinstance(x, Hashable) 917 else x 918 ) 919 ) 920 921 ### Check for numeric columns. 922 numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None)) 923 if numeric_precision is not None and numeric_scale is not None: 924 numeric_cols = get_numeric_cols(df) 925 for col in numeric_cols: 926 df[col] = df[col].apply( 927 lambda x: ( 928 quantize_decimal(x, numeric_scale, numeric_precision) 929 if isinstance(x, Decimal) 930 else x 931 ) 932 ) 933 934 if PD_TO_SQLALCHEMY_DTYPES_FLAVORS['uuid'].get(self.flavor, None) != 'Uuid': 935 uuid_cols = get_uuid_cols(df) 936 for col in uuid_cols: 937 df[col] = df[col].astype(str) 938 939 try: 940 with warnings.catch_warnings(): 941 warnings.filterwarnings('ignore') 942 df.to_sql(**to_sql_kw) 943 success = True 944 except Exception as e: 945 if not silent: 946 warn(str(e)) 947 success, msg = False, str(e) 948 949 end = time.perf_counter() 950 if success: 951 msg = f"It took {round(end - start, 2)} seconds to sync {len(df)} rows to {name}." 952 stats['start'] = start 953 stats['end'] = end 954 stats['duration'] = end - start 955 956 if debug: 957 print(f" done.", flush=True) 958 dprint(msg) 959 960 stats['success'] = success 961 stats['msg'] = msg 962 if as_tuple: 963 return success, msg 964 if as_dict: 965 return stats 966 return success
Upload a DataFrame's contents to the SQL server.
Parameters
- df (pd.DataFrame): The DataFrame to be uploaded.
- name (str): The name of the table to be created.
- index (bool, default False): If True, creates the DataFrame's indices as columns.
- if_exists (str, default 'replace'): Drop and create the table ('replace') or append if it exists ('append') or raise Exception ('fail'). Options are ['replace', 'append', 'fail'].
- method (str, default ''): None or multi. Details on pandas.to_sql.
- chunksize (Optional[int], default -1): How many rows to insert at a time.
- schema (Optional[str], default None):
Optionally override the schema for the table.
Defaults to
SQLConnector.schema
. - as_tuple (bool, default False):
If
True
, return a (success_bool, message) tuple instead of abool
. Defaults toFalse
. - as_dict (bool, default False):
If
True
, return a dictionary of transaction information. The keys aresuccess
,msg
,start
,end
,duration
,num_rows
,chunksize
,method
, andtarget
. - kw (Any):
Additional arguments will be passed to the DataFrame's
to_sql
function
Returns
- Either a
bool
or aSuccessTuple
(depends onas_tuple
).
616def exec_queries( 617 self, 618 queries: List[ 619 Union[ 620 str, 621 Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]] 622 ] 623 ], 624 break_on_error: bool = False, 625 rollback: bool = True, 626 silent: bool = False, 627 debug: bool = False, 628) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]: 629 """ 630 Execute a list of queries in a single transaction. 631 632 Parameters 633 ---------- 634 queries: List[ 635 Union[ 636 str, 637 Tuple[str, Callable[[], List[str]]] 638 ] 639 ] 640 The queries in the transaction to be executed. 641 If a query is a tuple, the second item of the tuple 642 will be considered a callable hook that returns a list of queries to be executed 643 before the next item in the list. 644 645 break_on_error: bool, default False 646 If `True`, stop executing when a query fails. 647 648 rollback: bool, default True 649 If `break_on_error` is `True`, rollback the transaction if a query fails. 650 651 silent: bool, default False 652 If `True`, suppress warnings. 653 654 Returns 655 ------- 656 A list of SQLAlchemy results. 657 """ 658 from meerschaum.utils.warnings import warn 659 from meerschaum.utils.debug import dprint 660 from meerschaum.utils.packages import attempt_import 661 sqlalchemy, sqlalchemy_orm = attempt_import('sqlalchemy', 'sqlalchemy.orm') 662 session = sqlalchemy_orm.Session(self.engine) 663 664 result = None 665 results = [] 666 with session.begin(): 667 for query in queries: 668 hook = None 669 result = None 670 671 if isinstance(query, tuple): 672 query, hook = query 673 if isinstance(query, str): 674 query = sqlalchemy.text(query) 675 676 if debug: 677 dprint(f"[{self}]\n" + str(query)) 678 679 try: 680 result = session.execute(query) 681 session.flush() 682 except Exception as e: 683 msg = (f"Encountered error while executing:\n{e}") 684 if not silent: 685 warn(msg) 686 elif debug: 687 dprint(f"[{self}]\n" + str(msg)) 688 result = None 689 if result is None and break_on_error: 690 if rollback: 691 session.rollback() 692 break 693 elif result is not None and hook is not None: 694 hook_queries = hook(session) 695 if hook_queries: 696 hook_results = self.exec_queries( 697 hook_queries, 698 break_on_error = break_on_error, 699 rollback=rollback, 700 silent=silent, 701 debug=debug, 702 ) 703 result = (result, hook_results) 704 705 results.append(result) 706 707 return results
Execute a list of queries in a single transaction.
Parameters
- queries (List[): Union[ str, Tuple[str, Callable[[], List[str]]] ]
- ]: The queries in the transaction to be executed. If a query is a tuple, the second item of the tuple will be considered a callable hook that returns a list of queries to be executed before the next item in the list.
- break_on_error (bool, default False):
If
True
, stop executing when a query fails. - rollback (bool, default True):
If
break_on_error
isTrue
, rollback the transaction if a query fails. - silent (bool, default False):
If
True
, suppress warnings.
Returns
- A list of SQLAlchemy results.
1065def get_connection(self, rebuild: bool = False) -> 'sqlalchemy.engine.base.Connection': 1066 """ 1067 Return the current alive connection. 1068 1069 Parameters 1070 ---------- 1071 rebuild: bool, default False 1072 If `True`, close the previous connection and open a new one. 1073 1074 Returns 1075 ------- 1076 A `sqlalchemy.engine.base.Connection` object. 1077 """ 1078 import threading 1079 if '_thread_connections' not in self.__dict__: 1080 self.__dict__['_thread_connections'] = {} 1081 1082 self._cleanup_connections() 1083 1084 thread_id = threading.get_ident() 1085 1086 thread_connections = self.__dict__.get('_thread_connections', {}) 1087 connection = thread_connections.get(thread_id, None) 1088 1089 if rebuild and connection is not None: 1090 try: 1091 connection.close() 1092 except Exception: 1093 pass 1094 1095 _ = thread_connections.pop(thread_id, None) 1096 connection = None 1097 1098 if connection is None or connection.closed: 1099 connection = self.engine.connect() 1100 thread_connections[thread_id] = connection 1101 1102 return connection
Return the current alive connection.
Parameters
- rebuild (bool, default False):
If
True
, close the previous connection and open a new one.
Returns
- A
sqlalchemy.engine.base.Connection
object.
642def test_connection( 643 self, 644 **kw: Any 645) -> Union[bool, None]: 646 """ 647 Test if a successful connection to the database may be made. 648 649 Parameters 650 ---------- 651 **kw: 652 The keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`. 653 654 Returns 655 ------- 656 `True` if a connection is made, otherwise `False` or `None` in case of failure. 657 658 """ 659 import warnings 660 from meerschaum.connectors.poll import retry_connect 661 _default_kw = {'max_retries': 1, 'retry_wait': 0, 'warn': False, 'connector': self} 662 _default_kw.update(kw) 663 with warnings.catch_warnings(): 664 warnings.filterwarnings('ignore', 'Could not') 665 try: 666 return retry_connect(**_default_kw) 667 except Exception as e: 668 return False
Test if a successful connection to the database may be made.
Parameters
- **kw:: The keyword arguments are passed to
meerschaum.connectors.poll.retry_connect
.
Returns
True
if a connection is made, otherwiseFalse
orNone
in case of failure.
17def fetch( 18 self, 19 pipe: mrsm.Pipe, 20 begin: Union[datetime, int, str, None] = '', 21 end: Union[datetime, int, str, None] = None, 22 check_existing: bool = True, 23 chunk_hook: Optional[Callable[['pd.DataFrame'], Any]] = None, 24 chunksize: Optional[int] = -1, 25 workers: Optional[int] = None, 26 debug: bool = False, 27 **kw: Any 28) -> Union['pd.DataFrame', List[Any], None]: 29 """Execute the SQL definition and return a Pandas DataFrame. 30 31 Parameters 32 ---------- 33 pipe: mrsm.Pipe 34 The pipe object which contains the `fetch` metadata. 35 36 - pipe.columns['datetime']: str 37 - Name of the datetime column for the remote table. 38 - pipe.parameters['fetch']: Dict[str, Any] 39 - Parameters necessary to execute a query. 40 - pipe.parameters['fetch']['definition']: str 41 - Raw SQL query to execute to generate the pandas DataFrame. 42 - pipe.parameters['fetch']['backtrack_minutes']: Union[int, float] 43 - How many minutes before `begin` to search for data (*optional*). 44 45 begin: Union[datetime, int, str, None], default None 46 Most recent datatime to search for data. 47 If `backtrack_minutes` is provided, subtract `backtrack_minutes`. 48 49 end: Union[datetime, int, str, None], default None 50 The latest datetime to search for data. 51 If `end` is `None`, do not bound 52 53 check_existing: bool, defult True 54 If `False`, use a backtrack interval of 0 minutes. 55 56 chunk_hook: Callable[[pd.DataFrame], Any], default None 57 A function to pass to `SQLConnector.read()` that accepts a Pandas DataFrame. 58 59 chunksize: Optional[int], default -1 60 How many rows to load into memory at once (when `chunk_hook` is provided). 61 Otherwise the entire result set is loaded into memory. 62 63 workers: Optional[int], default None 64 How many threads to use when consuming the generator (when `chunk_hook is provided). 65 Defaults to the number of cores. 66 67 debug: bool, default False 68 Verbosity toggle. 69 70 Returns 71 ------- 72 A pandas DataFrame or `None`. 73 If `chunk_hook` is not None, return a list of the hook function's results. 74 """ 75 meta_def = self.get_pipe_metadef( 76 pipe, 77 begin=begin, 78 end=end, 79 check_existing=check_existing, 80 debug=debug, 81 **kw 82 ) 83 as_hook_results = chunk_hook is not None 84 chunks = self.read( 85 meta_def, 86 chunk_hook=chunk_hook, 87 as_hook_results=as_hook_results, 88 chunksize=chunksize, 89 workers=workers, 90 debug=debug, 91 ) 92 ### if sqlite, parse for datetimes 93 if not as_hook_results and self.flavor == 'sqlite': 94 from meerschaum.utils.dataframe import parse_df_datetimes 95 from meerschaum.utils.dtypes import are_dtypes_equal 96 ignore_cols = [ 97 col 98 for col, dtype in pipe.dtypes.items() 99 if not are_dtypes_equal(str(dtype), 'datetime') 100 ] 101 return ( 102 parse_df_datetimes( 103 chunk, 104 ignore_cols=ignore_cols, 105 strip_timezone=(pipe.tzinfo is None), 106 debug=debug, 107 ) 108 for chunk in chunks 109 ) 110 return chunks
Execute the SQL definition and return a Pandas DataFrame.
Parameters
pipe (mrsm.Pipe): The pipe object which contains the
fetch
metadata.- pipe.columns['datetime']: str
- Name of the datetime column for the remote table.
- pipe.parameters['fetch']: Dict[str, Any]
- Parameters necessary to execute a query.
- pipe.parameters['fetch']['definition']: str
- Raw SQL query to execute to generate the pandas DataFrame.
- pipe.parameters['fetch']['backtrack_minutes']: Union[int, float]
- How many minutes before
begin
to search for data (optional).
- How many minutes before
- pipe.columns['datetime']: str
- begin (Union[datetime, int, str, None], default None):
Most recent datatime to search for data.
If
backtrack_minutes
is provided, subtractbacktrack_minutes
. - end (Union[datetime, int, str, None], default None):
The latest datetime to search for data.
If
end
isNone
, do not bound - check_existing (bool, defult True):
If
False
, use a backtrack interval of 0 minutes. - chunk_hook (Callable[[pd.DataFrame], Any], default None):
A function to pass to
SQLConnector.read()
that accepts a Pandas DataFrame. - chunksize (Optional[int], default -1):
How many rows to load into memory at once (when
chunk_hook
is provided). Otherwise the entire result set is loaded into memory. - workers (Optional[int], default None): How many threads to use when consuming the generator (when `chunk_hook is provided). Defaults to the number of cores.
- debug (bool, default False): Verbosity toggle.
Returns
- A pandas DataFrame or
None
. - If
chunk_hook
is not None, return a list of the hook function's results.
113def get_pipe_metadef( 114 self, 115 pipe: mrsm.Pipe, 116 params: Optional[Dict[str, Any]] = None, 117 begin: Union[datetime, int, str, None] = '', 118 end: Union[datetime, int, str, None] = None, 119 check_existing: bool = True, 120 debug: bool = False, 121 **kw: Any 122) -> Union[str, None]: 123 """ 124 Return a pipe's meta definition fetch query. 125 126 params: Optional[Dict[str, Any]], default None 127 Optional params dictionary to build the `WHERE` clause. 128 See `meerschaum.utils.sql.build_where`. 129 130 begin: Union[datetime, int, str, None], default None 131 Most recent datatime to search for data. 132 If `backtrack_minutes` is provided, subtract `backtrack_minutes`. 133 134 end: Union[datetime, int, str, None], default None 135 The latest datetime to search for data. 136 If `end` is `None`, do not bound 137 138 check_existing: bool, default True 139 If `True`, apply the backtrack interval. 140 141 debug: bool, default False 142 Verbosity toggle. 143 144 Returns 145 ------- 146 A pipe's meta definition fetch query string. 147 """ 148 from meerschaum.utils.debug import dprint 149 from meerschaum.utils.warnings import warn, error 150 from meerschaum.utils.sql import sql_item_name, dateadd_str, build_where 151 from meerschaum.utils.misc import is_int 152 from meerschaum.config import get_config 153 154 definition = get_pipe_query(pipe) 155 156 if not pipe.columns.get('datetime', None): 157 _dt = pipe.guess_datetime() 158 dt_name = sql_item_name(_dt, self.flavor, None) if _dt else None 159 is_guess = True 160 else: 161 _dt = pipe.get_columns('datetime') 162 dt_name = sql_item_name(_dt, self.flavor, None) 163 is_guess = False 164 165 if begin not in (None, '') or end is not None: 166 if is_guess: 167 if _dt is None: 168 warn( 169 f"Unable to determine a datetime column for {pipe}." 170 + "\n Ignoring begin and end...", 171 stack = False, 172 ) 173 begin, end = '', None 174 else: 175 warn( 176 f"A datetime wasn't specified for {pipe}.\n" 177 + f" Using column \"{_dt}\" for datetime bounds...", 178 stack = False 179 ) 180 181 apply_backtrack = begin == '' and check_existing 182 backtrack_interval = pipe.get_backtrack_interval(check_existing=check_existing, debug=debug) 183 btm = ( 184 int(backtrack_interval.total_seconds() / 60) 185 if isinstance(backtrack_interval, timedelta) 186 else backtrack_interval 187 ) 188 begin = ( 189 pipe.get_sync_time(debug=debug) 190 if begin == '' 191 else begin 192 ) 193 194 if begin and end and begin >= end: 195 begin = None 196 197 if dt_name: 198 begin_da = ( 199 dateadd_str( 200 flavor=self.flavor, 201 datepart='minute', 202 number=((-1 * btm) if apply_backtrack else 0), 203 begin=begin, 204 ) 205 if begin 206 else None 207 ) 208 end_da = ( 209 dateadd_str( 210 flavor=self.flavor, 211 datepart='minute', 212 number=0, 213 begin=end, 214 ) 215 if end 216 else None 217 ) 218 219 meta_def = ( 220 _simple_fetch_query(pipe, self.flavor) if ( 221 (not (pipe.columns or {}).get('id', None)) 222 or (not get_config('system', 'experimental', 'join_fetch')) 223 ) else _join_fetch_query(pipe, self.flavor, debug=debug, **kw) 224 ) 225 226 has_where = 'where' in meta_def.lower()[meta_def.lower().rfind('definition'):] 227 if dt_name and (begin_da or end_da): 228 definition_dt_name = ( 229 dateadd_str(self.flavor, 'minute', 0, f"definition.{dt_name}") 230 if not is_int((begin_da or end_da)) 231 else f"definition.{dt_name}" 232 ) 233 meta_def += "\n" + ("AND" if has_where else "WHERE") + " " 234 has_where = True 235 if begin_da: 236 meta_def += f"{definition_dt_name} >= {begin_da}" 237 if begin_da and end_da: 238 meta_def += " AND " 239 if end_da: 240 meta_def += f"{definition_dt_name} < {end_da}" 241 242 if params is not None: 243 params_where = build_where(params, self, with_where=False) 244 meta_def += "\n" + ("AND" if has_where else "WHERE") + " " 245 has_where = True 246 meta_def += params_where 247 248 return meta_def
Return a pipe's meta definition fetch query.
params: Optional[Dict[str, Any]], default None
Optional params dictionary to build the WHERE
clause.
See meerschaum.utils.sql.build_where
.
begin: Union[datetime, int, str, None], default None
Most recent datatime to search for data.
If backtrack_minutes
is provided, subtract backtrack_minutes
.
end: Union[datetime, int, str, None], default None
The latest datetime to search for data.
If end
is None
, do not bound
check_existing: bool, default True
If True
, apply the backtrack interval.
debug: bool, default False Verbosity toggle.
Returns
- A pipe's meta definition fetch query string.
35def cli( 36 self, 37 debug: bool = False, 38 ) -> SuccessTuple: 39 """ 40 Launch a subprocess for an interactive CLI. 41 """ 42 from meerschaum.utils.venv import venv_exec 43 env = copy.deepcopy(dict(os.environ)) 44 env[f'MRSM_SQL_{self.label.upper()}'] = json.dumps(self.meta) 45 cli_code = ( 46 "import sys\n" 47 "import meerschaum as mrsm\n" 48 f"conn = mrsm.get_connector('sql:{self.label}')\n" 49 "success, msg = conn._cli_exit()\n" 50 "mrsm.pprint((success, msg))\n" 51 "if not success:\n" 52 " raise Exception(msg)" 53 ) 54 try: 55 _ = venv_exec(cli_code, venv=None, debug=debug, capture_output=False) 56 except Exception as e: 57 return False, f"[{self}] Failed to start CLI:\n{e}" 58 return True, "Success"
Launch a subprocess for an interactive CLI.
144def fetch_pipes_keys( 145 self, 146 connector_keys: Optional[List[str]] = None, 147 metric_keys: Optional[List[str]] = None, 148 location_keys: Optional[List[str]] = None, 149 tags: Optional[List[str]] = None, 150 params: Optional[Dict[str, Any]] = None, 151 debug: bool = False 152) -> Optional[List[Tuple[str, str, Optional[str]]]]: 153 """ 154 Return a list of tuples corresponding to the parameters provided. 155 156 Parameters 157 ---------- 158 connector_keys: Optional[List[str]], default None 159 List of connector_keys to search by. 160 161 metric_keys: Optional[List[str]], default None 162 List of metric_keys to search by. 163 164 location_keys: Optional[List[str]], default None 165 List of location_keys to search by. 166 167 params: Optional[Dict[str, Any]], default None 168 Dictionary of additional parameters to search by. 169 E.g. `--params pipe_id:1` 170 171 debug: bool, default False 172 Verbosity toggle. 173 """ 174 from meerschaum.utils.debug import dprint 175 from meerschaum.utils.packages import attempt_import 176 from meerschaum.utils.misc import separate_negation_values, flatten_list 177 from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists 178 from meerschaum.config.static import STATIC_CONFIG 179 import json 180 from copy import deepcopy 181 sqlalchemy, sqlalchemy_sql_functions = attempt_import('sqlalchemy', 'sqlalchemy.sql.functions') 182 coalesce = sqlalchemy_sql_functions.coalesce 183 184 if connector_keys is None: 185 connector_keys = [] 186 if metric_keys is None: 187 metric_keys = [] 188 if location_keys is None: 189 location_keys = [] 190 else: 191 location_keys = [ 192 ( 193 lk 194 if lk not in ('[None]', 'None', 'null') 195 else 'None' 196 ) 197 for lk in location_keys 198 ] 199 if tags is None: 200 tags = [] 201 202 if params is None: 203 params = {} 204 205 ### Add three primary keys to params dictionary 206 ### (separated for convenience of arguments). 207 cols = { 208 'connector_keys': [str(ck) for ck in connector_keys], 209 'metric_key': [str(mk) for mk in metric_keys], 210 'location_key': [str(lk) for lk in location_keys], 211 } 212 213 ### Make deep copy so we don't mutate this somewhere else. 214 parameters = deepcopy(params) 215 for col, vals in cols.items(): 216 if vals not in [[], ['*']]: 217 parameters[col] = vals 218 219 if not table_exists('mrsm_pipes', self, schema=self.instance_schema, debug=debug): 220 return [] 221 222 from meerschaum.connectors.sql.tables import get_tables 223 pipes_tbl = get_tables(mrsm_instance=self, create=False, debug=debug)['pipes'] 224 225 _params = {} 226 for k, v in parameters.items(): 227 _v = json.dumps(v) if isinstance(v, dict) else v 228 _params[k] = _v 229 230 negation_prefix = STATIC_CONFIG['system']['fetch_pipes_keys']['negation_prefix'] 231 ### Parse regular params. 232 ### If a param begins with '_', negate it instead. 233 _where = [ 234 ( 235 (coalesce(pipes_tbl.c[key], 'None') == val) 236 if not str(val).startswith(negation_prefix) 237 else (pipes_tbl.c[key] != key) 238 ) for key, val in _params.items() 239 if not isinstance(val, (list, tuple)) and key in pipes_tbl.c 240 ] 241 select_cols = ( 242 [ 243 pipes_tbl.c.connector_keys, 244 pipes_tbl.c.metric_key, 245 pipes_tbl.c.location_key, 246 ] 247 ) 248 249 q = sqlalchemy.select(*select_cols).where(sqlalchemy.and_(True, *_where)) 250 for c, vals in cols.items(): 251 if not isinstance(vals, (list, tuple)) or not vals or not c in pipes_tbl.c: 252 continue 253 _in_vals, _ex_vals = separate_negation_values(vals) 254 q = q.where(coalesce(pipes_tbl.c[c], 'None').in_(_in_vals)) if _in_vals else q 255 q = q.where(coalesce(pipes_tbl.c[c], 'None').not_in(_ex_vals)) if _ex_vals else q 256 257 ### Finally, parse tags. 258 tag_groups = [tag.split(',') for tag in tags] 259 in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups] 260 261 ors, nands = [], [] 262 for _in_tags, _ex_tags in in_ex_tag_groups: 263 sub_ands = [] 264 for nt in _in_tags: 265 sub_ands.append( 266 sqlalchemy.cast( 267 pipes_tbl.c['parameters'], 268 sqlalchemy.String, 269 ).like(f'%"tags":%"{nt}"%') 270 ) 271 if sub_ands: 272 ors.append(sqlalchemy.and_(*sub_ands)) 273 274 for xt in _ex_tags: 275 nands.append( 276 sqlalchemy.cast( 277 pipes_tbl.c['parameters'], 278 sqlalchemy.String, 279 ).not_like(f'%"tags":%"{xt}"%') 280 ) 281 282 q = q.where(sqlalchemy.and_(*nands)) if nands else q 283 q = q.where(sqlalchemy.or_(*ors)) if ors else q 284 loc_asc = sqlalchemy.asc(pipes_tbl.c['location_key']) 285 if self.flavor not in OMIT_NULLSFIRST_FLAVORS: 286 loc_asc = sqlalchemy.nullsfirst(loc_asc) 287 q = q.order_by( 288 sqlalchemy.asc(pipes_tbl.c['connector_keys']), 289 sqlalchemy.asc(pipes_tbl.c['metric_key']), 290 loc_asc, 291 ) 292 293 ### execute the query and return a list of tuples 294 if debug: 295 dprint(q.compile(compile_kwargs={'literal_binds': True})) 296 try: 297 rows = ( 298 self.execute(q).fetchall() 299 if self.flavor != 'duckdb' 300 else [ 301 (row['connector_keys'], row['metric_key'], row['location_key']) 302 for row in self.read(q).to_dict(orient='records') 303 ] 304 ) 305 except Exception as e: 306 error(str(e)) 307 308 return [(row[0], row[1], row[2]) for row in rows]
Return a list of tuples corresponding to the parameters provided.
Parameters
- connector_keys (Optional[List[str]], default None): List of connector_keys to search by.
- metric_keys (Optional[List[str]], default None): List of metric_keys to search by.
- location_keys (Optional[List[str]], default None): List of location_keys to search by.
- params (Optional[Dict[str, Any]], default None):
Dictionary of additional parameters to search by.
E.g.
--params pipe_id:1
- debug (bool, default False): Verbosity toggle.
311def create_indices( 312 self, 313 pipe: mrsm.Pipe, 314 indices: Optional[List[str]] = None, 315 debug: bool = False 316) -> bool: 317 """ 318 Create a pipe's indices. 319 """ 320 from meerschaum.utils.sql import sql_item_name, update_queries 321 from meerschaum.utils.debug import dprint 322 if debug: 323 dprint(f"Creating indices for {pipe}...") 324 if not pipe.indices: 325 warn(f"{pipe} has no index columns; skipping index creation.", stack=False) 326 return True 327 328 _ = pipe.__dict__.pop('_columns_indices', None) 329 ix_queries = { 330 ix: queries 331 for ix, queries in self.get_create_index_queries(pipe, debug=debug).items() 332 if indices is None or ix in indices 333 } 334 success = True 335 for ix, queries in ix_queries.items(): 336 ix_success = all(self.exec_queries(queries, debug=debug, silent=False)) 337 success = success and ix_success 338 if not ix_success: 339 warn(f"Failed to create index on column: {ix}") 340 341 return success
Create a pipe's indices.
344def drop_indices( 345 self, 346 pipe: mrsm.Pipe, 347 indices: Optional[List[str]] = None, 348 debug: bool = False 349) -> bool: 350 """ 351 Drop a pipe's indices. 352 """ 353 from meerschaum.utils.debug import dprint 354 if debug: 355 dprint(f"Dropping indices for {pipe}...") 356 if not pipe.columns: 357 warn(f"Unable to drop indices for {pipe} without columns.", stack=False) 358 return False 359 ix_queries = { 360 ix: queries 361 for ix, queries in self.get_drop_index_queries(pipe, debug=debug).items() 362 if indices is None or ix in indices 363 } 364 success = True 365 for ix, queries in ix_queries.items(): 366 ix_success = all(self.exec_queries(queries, debug=debug, silent=True)) 367 if not ix_success: 368 success = False 369 if debug: 370 dprint(f"Failed to drop index on column: {ix}") 371 return success
Drop a pipe's indices.
374def get_create_index_queries( 375 self, 376 pipe: mrsm.Pipe, 377 debug: bool = False, 378) -> Dict[str, List[str]]: 379 """ 380 Return a dictionary mapping columns to a `CREATE INDEX` or equivalent query. 381 382 Parameters 383 ---------- 384 pipe: mrsm.Pipe 385 The pipe to which the queries will correspond. 386 387 Returns 388 ------- 389 A dictionary of index names mapping to lists of queries. 390 """ 391 ### NOTE: Due to recent breaking changes in DuckDB, indices don't behave properly. 392 if self.flavor == 'duckdb': 393 return {} 394 from meerschaum.utils.sql import ( 395 sql_item_name, 396 get_distinct_col_count, 397 update_queries, 398 get_null_replacement, 399 get_create_table_queries, 400 get_rename_table_queries, 401 COALESCE_UNIQUE_INDEX_FLAVORS, 402 ) 403 from meerschaum.utils.dtypes.sql import ( 404 get_db_type_from_pd_type, 405 get_pd_type_from_db_type, 406 AUTO_INCREMENT_COLUMN_FLAVORS, 407 ) 408 from meerschaum.config import get_config 409 index_queries = {} 410 411 upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries 412 static = pipe.parameters.get('static', False) 413 index_names = pipe.get_indices() 414 indices = pipe.indices 415 existing_cols_types = pipe.get_columns_types(debug=debug) 416 existing_cols_pd_types = { 417 col: get_pd_type_from_db_type(typ) 418 for col, typ in existing_cols_types.items() 419 } 420 existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug) 421 existing_ix_names = set() 422 existing_primary_keys = [] 423 for col, col_indices in existing_cols_indices.items(): 424 for col_ix_doc in col_indices: 425 existing_ix_names.add(col_ix_doc.get('name', None)) 426 if col_ix_doc.get('type', None) == 'PRIMARY KEY': 427 existing_primary_keys.append(col) 428 429 _datetime = pipe.get_columns('datetime', error=False) 430 _datetime_name = ( 431 sql_item_name(_datetime, self.flavor, None) 432 if _datetime is not None else None 433 ) 434 _datetime_index_name = ( 435 sql_item_name(index_names['datetime'], flavor=self.flavor, schema=None) 436 if index_names.get('datetime', None) 437 else None 438 ) 439 _id = pipe.get_columns('id', error=False) 440 _id_name = ( 441 sql_item_name(_id, self.flavor, None) 442 if _id is not None 443 else None 444 ) 445 primary_key = pipe.columns.get('primary', None) 446 primary_key_name = ( 447 sql_item_name(primary_key, flavor=self.flavor, schema=None) 448 if primary_key 449 else None 450 ) 451 autoincrement = ( 452 pipe.parameters.get('autoincrement', False) 453 or ( 454 primary_key is not None 455 and primary_key not in existing_cols_pd_types 456 ) 457 ) 458 primary_key_db_type = ( 459 get_db_type_from_pd_type(pipe.dtypes.get(primary_key, 'int'), self.flavor) 460 if primary_key 461 else None 462 ) 463 primary_key_constraint_name = ( 464 sql_item_name(f'pk_{pipe.target}', self.flavor, None) 465 if primary_key is not None 466 else None 467 ) 468 469 _id_index_name = ( 470 sql_item_name(index_names['id'], self.flavor, None) 471 if index_names.get('id', None) 472 else None 473 ) 474 _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 475 _create_space_partition = get_config('system', 'experimental', 'space') 476 477 ### create datetime index 478 if _datetime is not None: 479 if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True): 480 _id_count = ( 481 get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self) 482 if (_id is not None and _create_space_partition) else None 483 ) 484 485 chunk_interval = pipe.get_chunk_interval(debug=debug) 486 chunk_interval_minutes = ( 487 chunk_interval 488 if isinstance(chunk_interval, int) 489 else int(chunk_interval.total_seconds() / 60) 490 ) 491 chunk_time_interval = ( 492 f"INTERVAL '{chunk_interval_minutes} MINUTES'" 493 if isinstance(chunk_interval, timedelta) 494 else f'{chunk_interval_minutes}' 495 ) 496 497 dt_query = ( 498 f"SELECT public.create_hypertable('{_pipe_name}', " + 499 f"'{_datetime}', " 500 + ( 501 f"'{_id}', {_id_count}, " if (_id is not None and _create_space_partition) 502 else '' 503 ) 504 + f'chunk_time_interval => {chunk_time_interval}, ' 505 + 'if_not_exists => true, ' 506 + "migrate_data => true);" 507 ) 508 elif self.flavor == 'mssql': 509 dt_query = ( 510 "CREATE " 511 + ("CLUSTERED " if not primary_key else '') 512 + f"INDEX {_datetime_index_name} " 513 + f"ON {_pipe_name} ({_datetime_name})" 514 ) 515 else: ### mssql, sqlite, etc. 516 dt_query = ( 517 f"CREATE INDEX {_datetime_index_name} " 518 + f"ON {_pipe_name} ({_datetime_name})" 519 ) 520 521 index_queries[_datetime] = [dt_query] 522 523 primary_queries = [] 524 if ( 525 primary_key is not None 526 and primary_key not in existing_primary_keys 527 and not static 528 ): 529 if autoincrement and primary_key not in existing_cols_pd_types: 530 autoincrement_str = AUTO_INCREMENT_COLUMN_FLAVORS.get( 531 self.flavor, 532 AUTO_INCREMENT_COLUMN_FLAVORS['default'] 533 ) 534 primary_queries.extend([ 535 ( 536 f"ALTER TABLE {_pipe_name}\n" 537 f"ADD {primary_key_name} {primary_key_db_type} {autoincrement_str}" 538 ), 539 ]) 540 elif not autoincrement and primary_key in existing_cols_pd_types: 541 if self.flavor == 'sqlite': 542 new_table_name = sql_item_name( 543 f'_new_{pipe.target}', 544 self.flavor, 545 self.get_pipe_schema(pipe) 546 ) 547 select_cols_str = ', '.join( 548 [ 549 sql_item_name(col, self.flavor, None) 550 for col in existing_cols_types 551 ] 552 ) 553 primary_queries.extend( 554 get_create_table_queries( 555 existing_cols_pd_types, 556 f'_new_{pipe.target}', 557 self.flavor, 558 schema=self.get_pipe_schema(pipe), 559 primary_key=primary_key, 560 ) + [ 561 ( 562 f"INSERT INTO {new_table_name} ({select_cols_str})\n" 563 f"SELECT {select_cols_str}\nFROM {_pipe_name}" 564 ), 565 f"DROP TABLE {_pipe_name}", 566 ] + get_rename_table_queries( 567 f'_new_{pipe.target}', 568 pipe.target, 569 self.flavor, 570 schema=self.get_pipe_schema(pipe), 571 ) 572 ) 573 elif self.flavor == 'oracle': 574 primary_queries.extend([ 575 ( 576 f"ALTER TABLE {_pipe_name}\n" 577 f"MODIFY {primary_key_name} NOT NULL" 578 ), 579 ( 580 f"ALTER TABLE {_pipe_name}\n" 581 f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})" 582 ) 583 ]) 584 elif self.flavor in ('mysql', 'mariadb'): 585 primary_queries.extend([ 586 ( 587 f"ALTER TABLE {_pipe_name}\n" 588 f"MODIFY {primary_key_name} {primary_key_db_type} NOT NULL" 589 ), 590 ( 591 f"ALTER TABLE {_pipe_name}\n" 592 f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})" 593 ) 594 ]) 595 elif self.flavor == 'timescaledb': 596 primary_queries.extend([ 597 ( 598 f"ALTER TABLE {_pipe_name}\n" 599 f"ALTER COLUMN {primary_key_name} SET NOT NULL" 600 ), 601 ( 602 f"ALTER TABLE {_pipe_name}\n" 603 f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY (" + ( 604 f"{_datetime_name}, " if _datetime_name else "" 605 ) + f"{primary_key_name})" 606 ), 607 ]) 608 elif self.flavor in ('citus', 'postgresql', 'duckdb'): 609 primary_queries.extend([ 610 ( 611 f"ALTER TABLE {_pipe_name}\n" 612 f"ALTER COLUMN {primary_key_name} SET NOT NULL" 613 ), 614 ( 615 f"ALTER TABLE {_pipe_name}\n" 616 f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})" 617 ), 618 ]) 619 else: 620 primary_queries.extend([ 621 ( 622 f"ALTER TABLE {_pipe_name}\n" 623 f"ALTER COLUMN {primary_key_name} {primary_key_db_type} NOT NULL" 624 ), 625 ( 626 f"ALTER TABLE {_pipe_name}\n" 627 f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})" 628 ), 629 ]) 630 index_queries[primary_key] = primary_queries 631 632 ### create id index 633 if _id_name is not None: 634 if self.flavor == 'timescaledb': 635 ### Already created indices via create_hypertable. 636 id_query = ( 637 None if (_id is not None and _create_space_partition) 638 else ( 639 f"CREATE INDEX IF NOT EXISTS {_id_index_name} ON {_pipe_name} ({_id_name})" 640 if _id is not None 641 else None 642 ) 643 ) 644 pass 645 else: ### mssql, sqlite, etc. 646 id_query = f"CREATE INDEX {_id_index_name} ON {_pipe_name} ({_id_name})" 647 648 if id_query is not None: 649 index_queries[_id] = id_query if isinstance(id_query, list) else [id_query] 650 651 ### Create indices for other labels in `pipe.columns`. 652 other_index_names = { 653 ix_key: ix_unquoted 654 for ix_key, ix_unquoted in index_names.items() 655 if ix_key not in ('datetime', 'id', 'primary') and ix_unquoted not in existing_ix_names 656 } 657 for ix_key, ix_unquoted in other_index_names.items(): 658 ix_name = sql_item_name(ix_unquoted, self.flavor, None) 659 cols = indices[ix_key] 660 if not isinstance(cols, (list, tuple)): 661 cols = [cols] 662 cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col] 663 if not cols_names: 664 continue 665 cols_names_str = ", ".join(cols_names) 666 index_queries[ix_key] = [f"CREATE INDEX {ix_name} ON {_pipe_name} ({cols_names_str})"] 667 668 indices_cols_str = ', '.join( 669 list({ 670 sql_item_name(ix, self.flavor) 671 for ix_key, ix in pipe.columns.items() 672 if ix and ix in existing_cols_types 673 }) 674 ) 675 coalesce_indices_cols_str = ', '.join( 676 [ 677 ( 678 "COALESCE(" 679 + sql_item_name(ix, self.flavor) 680 + ", " 681 + get_null_replacement(existing_cols_types[ix], self.flavor) 682 + ") " 683 ) if ix_key != 'datetime' else (sql_item_name(ix, self.flavor)) 684 for ix_key, ix in pipe.columns.items() 685 if ix and ix in existing_cols_types 686 ] 687 ) 688 unique_index_name = sql_item_name(pipe.target + '_unique_index', self.flavor) 689 constraint_name = sql_item_name(pipe.target + '_constraint', self.flavor) 690 add_constraint_query = ( 691 f"ALTER TABLE {_pipe_name} ADD CONSTRAINT {constraint_name} UNIQUE ({indices_cols_str})" 692 ) 693 unique_index_cols_str = ( 694 indices_cols_str 695 if self.flavor not in COALESCE_UNIQUE_INDEX_FLAVORS 696 else coalesce_indices_cols_str 697 ) 698 create_unique_index_query = ( 699 f"CREATE UNIQUE INDEX {unique_index_name} ON {_pipe_name} ({unique_index_cols_str})" 700 ) 701 constraint_queries = [create_unique_index_query] 702 if self.flavor != 'sqlite': 703 constraint_queries.append(add_constraint_query) 704 if upsert and indices_cols_str: 705 index_queries[unique_index_name] = constraint_queries 706 return index_queries
Return a dictionary mapping columns to a CREATE INDEX
or equivalent query.
Parameters
- pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
- A dictionary of index names mapping to lists of queries.
709def get_drop_index_queries( 710 self, 711 pipe: mrsm.Pipe, 712 debug: bool = False, 713) -> Dict[str, List[str]]: 714 """ 715 Return a dictionary mapping columns to a `DROP INDEX` or equivalent query. 716 717 Parameters 718 ---------- 719 pipe: mrsm.Pipe 720 The pipe to which the queries will correspond. 721 722 Returns 723 ------- 724 A dictionary of column names mapping to lists of queries. 725 """ 726 ### NOTE: Due to breaking changes within DuckDB, indices must be skipped. 727 if self.flavor == 'duckdb': 728 return {} 729 if not pipe.exists(debug=debug): 730 return {} 731 from meerschaum.utils.sql import ( 732 sql_item_name, 733 table_exists, 734 hypertable_queries, 735 DROP_IF_EXISTS_FLAVORS, 736 ) 737 drop_queries = {} 738 schema = self.get_pipe_schema(pipe) 739 schema_prefix = (schema + '_') if schema else '' 740 indices = { 741 col: schema_prefix + ix 742 for col, ix in pipe.get_indices().items() 743 } 744 pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 745 pipe_name_no_schema = sql_item_name(pipe.target, self.flavor, None) 746 747 if self.flavor not in hypertable_queries: 748 is_hypertable = False 749 else: 750 is_hypertable_query = hypertable_queries[self.flavor].format(table_name=pipe_name) 751 is_hypertable = self.value(is_hypertable_query, silent=True, debug=debug) is not None 752 753 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 754 if is_hypertable: 755 nuke_queries = [] 756 temp_table = '_' + pipe.target + '_temp_migration' 757 temp_table_name = sql_item_name(temp_table, self.flavor, self.get_pipe_schema(pipe)) 758 759 if table_exists(temp_table, self, schema=self.get_pipe_schema(pipe), debug=debug): 760 nuke_queries.append(f"DROP TABLE {if_exists_str} {temp_table_name}") 761 nuke_queries += [ 762 f"SELECT * INTO {temp_table_name} FROM {pipe_name}", 763 f"DROP TABLE {if_exists_str} {pipe_name}", 764 f"ALTER TABLE {temp_table_name} RENAME TO {pipe_name_no_schema}", 765 ] 766 nuke_ix_keys = ('datetime', 'id') 767 nuked = False 768 for ix_key in nuke_ix_keys: 769 if ix_key in indices and not nuked: 770 drop_queries[ix_key] = nuke_queries 771 nuked = True 772 773 drop_queries.update({ 774 ix_key: ["DROP INDEX " + sql_item_name(ix_unquoted, self.flavor, None)] 775 for ix_key, ix_unquoted in indices.items() 776 if ix_key not in drop_queries 777 }) 778 return drop_queries
Return a dictionary mapping columns to a DROP INDEX
or equivalent query.
Parameters
- pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
- A dictionary of column names mapping to lists of queries.
2827def get_add_columns_queries( 2828 self, 2829 pipe: mrsm.Pipe, 2830 df: Union[pd.DataFrame, Dict[str, str]], 2831 _is_db_types: bool = False, 2832 debug: bool = False, 2833) -> List[str]: 2834 """ 2835 Add new null columns of the correct type to a table from a dataframe. 2836 2837 Parameters 2838 ---------- 2839 pipe: mrsm.Pipe 2840 The pipe to be altered. 2841 2842 df: Union[pd.DataFrame, Dict[str, str]] 2843 The pandas DataFrame which contains new columns. 2844 If a dictionary is provided, assume it maps columns to Pandas data types. 2845 2846 _is_db_types: bool, default False 2847 If `True`, assume `df` is a dictionary mapping columns to SQL native dtypes. 2848 2849 Returns 2850 ------- 2851 A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector. 2852 """ 2853 if not pipe.exists(debug=debug): 2854 return [] 2855 2856 if pipe.parameters.get('static', False): 2857 return [] 2858 2859 from decimal import Decimal 2860 import copy 2861 from meerschaum.utils.sql import ( 2862 sql_item_name, 2863 SINGLE_ALTER_TABLE_FLAVORS, 2864 get_table_cols_types, 2865 ) 2866 from meerschaum.utils.dtypes.sql import ( 2867 get_pd_type_from_db_type, 2868 get_db_type_from_pd_type, 2869 ) 2870 from meerschaum.utils.misc import flatten_list 2871 table_obj = self.get_pipe_table(pipe, debug=debug) 2872 is_dask = 'dask' in df.__module__ if not isinstance(df, dict) else False 2873 if is_dask: 2874 df = df.partitions[0].compute() 2875 df_cols_types = ( 2876 { 2877 col: str(typ) 2878 for col, typ in df.dtypes.items() 2879 } 2880 if not isinstance(df, dict) 2881 else copy.deepcopy(df) 2882 ) 2883 if not isinstance(df, dict) and len(df.index) > 0: 2884 for col, typ in list(df_cols_types.items()): 2885 if typ != 'object': 2886 continue 2887 val = df.iloc[0][col] 2888 if isinstance(val, (dict, list)): 2889 df_cols_types[col] = 'json' 2890 elif isinstance(val, Decimal): 2891 df_cols_types[col] = 'numeric' 2892 elif isinstance(val, str): 2893 df_cols_types[col] = 'str' 2894 db_cols_types = { 2895 col: get_pd_type_from_db_type(str(typ.type)) 2896 for col, typ in table_obj.columns.items() 2897 } if table_obj is not None else { 2898 col: get_pd_type_from_db_type(typ) 2899 for col, typ in get_table_cols_types( 2900 pipe.target, 2901 self, 2902 schema=self.get_pipe_schema(pipe), 2903 debug=debug, 2904 ).items() 2905 } 2906 new_cols = set(df_cols_types) - set(db_cols_types) 2907 if not new_cols: 2908 return [] 2909 2910 new_cols_types = { 2911 col: get_db_type_from_pd_type( 2912 df_cols_types[col], 2913 self.flavor 2914 ) for col in new_cols 2915 } 2916 2917 alter_table_query = "ALTER TABLE " + sql_item_name( 2918 pipe.target, self.flavor, self.get_pipe_schema(pipe) 2919 ) 2920 queries = [] 2921 for col, typ in new_cols_types.items(): 2922 add_col_query = ( 2923 "\nADD " 2924 + sql_item_name(col, self.flavor, None) 2925 + " " + typ + "," 2926 ) 2927 2928 if self.flavor in SINGLE_ALTER_TABLE_FLAVORS: 2929 queries.append(alter_table_query + add_col_query[:-1]) 2930 else: 2931 alter_table_query += add_col_query 2932 2933 ### For most flavors, only one query is required. 2934 ### This covers SQLite which requires one query per column. 2935 if not queries: 2936 queries.append(alter_table_query[:-1]) 2937 2938 if self.flavor != 'duckdb': 2939 return queries 2940 2941 ### NOTE: For DuckDB, we must drop and rebuild the indices. 2942 drop_index_queries = list(flatten_list( 2943 [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()] 2944 )) 2945 create_index_queries = list(flatten_list( 2946 [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()] 2947 )) 2948 2949 return drop_index_queries + queries + create_index_queries
Add new null columns of the correct type to a table from a dataframe.
Parameters
- pipe (mrsm.Pipe): The pipe to be altered.
- df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which contains new columns. If a dictionary is provided, assume it maps columns to Pandas data types.
- _is_db_types (bool, default False):
If
True
, assumedf
is a dictionary mapping columns to SQL native dtypes.
Returns
- A list of the
ALTER TABLE
SQL query or queries to be executed on the provided connector.
2952def get_alter_columns_queries( 2953 self, 2954 pipe: mrsm.Pipe, 2955 df: Union[pd.DataFrame, Dict[str, str]], 2956 debug: bool = False, 2957) -> List[str]: 2958 """ 2959 If we encounter a column of a different type, set the entire column to text. 2960 If the altered columns are numeric, alter to numeric instead. 2961 2962 Parameters 2963 ---------- 2964 pipe: mrsm.Pipe 2965 The pipe to be altered. 2966 2967 df: Union[pd.DataFrame, Dict[str, str]] 2968 The pandas DataFrame which may contain altered columns. 2969 If a dict is provided, assume it maps columns to Pandas data types. 2970 2971 Returns 2972 ------- 2973 A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector. 2974 """ 2975 if not pipe.exists(debug=debug): 2976 return [] 2977 if pipe.static: 2978 return 2979 from meerschaum.utils.sql import sql_item_name, DROP_IF_EXISTS_FLAVORS, get_table_cols_types 2980 from meerschaum.utils.dataframe import get_numeric_cols 2981 from meerschaum.utils.dtypes import are_dtypes_equal 2982 from meerschaum.utils.dtypes.sql import ( 2983 get_pd_type_from_db_type, 2984 get_db_type_from_pd_type, 2985 ) 2986 from meerschaum.utils.misc import flatten_list, generate_password, items_str 2987 table_obj = self.get_pipe_table(pipe, debug=debug) 2988 target = pipe.target 2989 session_id = generate_password(3) 2990 numeric_cols = ( 2991 get_numeric_cols(df) 2992 if not isinstance(df, dict) 2993 else [ 2994 col 2995 for col, typ in df.items() 2996 if typ == 'numeric' 2997 ] 2998 ) 2999 df_cols_types = ( 3000 { 3001 col: str(typ) 3002 for col, typ in df.dtypes.items() 3003 } 3004 if not isinstance(df, dict) 3005 else df 3006 ) 3007 db_cols_types = { 3008 col: get_pd_type_from_db_type(str(typ.type)) 3009 for col, typ in table_obj.columns.items() 3010 } if table_obj is not None else { 3011 col: get_pd_type_from_db_type(typ) 3012 for col, typ in get_table_cols_types( 3013 pipe.target, 3014 self, 3015 schema=self.get_pipe_schema(pipe), 3016 debug=debug, 3017 ).items() 3018 } 3019 pipe_bool_cols = [col for col, typ in pipe.dtypes.items() if are_dtypes_equal(str(typ), 'bool')] 3020 pd_db_df_aliases = { 3021 'int': 'bool', 3022 'float': 'bool', 3023 'numeric': 'bool', 3024 'guid': 'object', 3025 } 3026 if self.flavor == 'oracle': 3027 pd_db_df_aliases['int'] = 'numeric' 3028 3029 altered_cols = { 3030 col: (db_cols_types.get(col, 'object'), typ) 3031 for col, typ in df_cols_types.items() 3032 if not are_dtypes_equal(typ, db_cols_types.get(col, 'object').lower()) 3033 and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string') 3034 } 3035 3036 ### NOTE: Sometimes bools are coerced into ints or floats. 3037 altered_cols_to_ignore = set() 3038 for col, (db_typ, df_typ) in altered_cols.items(): 3039 for db_alias, df_alias in pd_db_df_aliases.items(): 3040 if db_alias in db_typ.lower() and df_alias in df_typ.lower(): 3041 altered_cols_to_ignore.add(col) 3042 3043 ### Oracle's bool handling sometimes mixes NUMBER and INT. 3044 for bool_col in pipe_bool_cols: 3045 if bool_col not in altered_cols: 3046 continue 3047 db_is_bool_compatible = ( 3048 are_dtypes_equal('int', altered_cols[bool_col][0]) 3049 or are_dtypes_equal('float', altered_cols[bool_col][0]) 3050 or are_dtypes_equal('numeric', altered_cols[bool_col][0]) 3051 or are_dtypes_equal('bool', altered_cols[bool_col][0]) 3052 ) 3053 df_is_bool_compatible = ( 3054 are_dtypes_equal('int', altered_cols[bool_col][1]) 3055 or are_dtypes_equal('float', altered_cols[bool_col][1]) 3056 or are_dtypes_equal('numeric', altered_cols[bool_col][1]) 3057 or are_dtypes_equal('bool', altered_cols[bool_col][1]) 3058 ) 3059 if db_is_bool_compatible and df_is_bool_compatible: 3060 altered_cols_to_ignore.add(bool_col) 3061 3062 for col in altered_cols_to_ignore: 3063 _ = altered_cols.pop(col, None) 3064 if not altered_cols: 3065 return [] 3066 3067 if numeric_cols: 3068 pipe.dtypes.update({col: 'numeric' for col in numeric_cols}) 3069 edit_success, edit_msg = pipe.edit(debug=debug) 3070 if not edit_success: 3071 warn( 3072 f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n" 3073 + f"{edit_msg}" 3074 ) 3075 else: 3076 numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ == 'numeric']) 3077 3078 numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False) 3079 text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False) 3080 altered_cols_types = { 3081 col: ( 3082 numeric_type 3083 if col in numeric_cols 3084 else text_type 3085 ) 3086 for col, (db_typ, typ) in altered_cols.items() 3087 } 3088 3089 if self.flavor == 'sqlite': 3090 temp_table_name = '-' + session_id + '_' + target 3091 rename_query = ( 3092 "ALTER TABLE " 3093 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3094 + " RENAME TO " 3095 + sql_item_name(temp_table_name, self.flavor, None) 3096 ) 3097 create_query = ( 3098 "CREATE TABLE " 3099 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3100 + " (\n" 3101 ) 3102 for col_name, col_obj in table_obj.columns.items(): 3103 create_query += ( 3104 sql_item_name(col_name, self.flavor, None) 3105 + " " 3106 + ( 3107 str(col_obj.type) 3108 if col_name not in altered_cols 3109 else altered_cols_types[col_name] 3110 ) 3111 + ",\n" 3112 ) 3113 create_query = create_query[:-2] + "\n)" 3114 3115 insert_query = ( 3116 "INSERT INTO " 3117 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3118 + ' (' 3119 + ', '.join([ 3120 sql_item_name(col_name, self.flavor, None) 3121 for col_name, _ in table_obj.columns.items() 3122 ]) 3123 + ')' 3124 + "\nSELECT\n" 3125 ) 3126 for col_name, col_obj in table_obj.columns.items(): 3127 new_col_str = ( 3128 sql_item_name(col_name, self.flavor, None) 3129 if col_name not in altered_cols 3130 else ( 3131 "CAST(" 3132 + sql_item_name(col_name, self.flavor, None) 3133 + " AS " 3134 + altered_cols_types[col_name] 3135 + ")" 3136 ) 3137 ) 3138 insert_query += new_col_str + ",\n" 3139 insert_query = insert_query[:-2] + ( 3140 f"\nFROM {sql_item_name(temp_table_name, self.flavor, self.get_pipe_schema(pipe))}" 3141 ) 3142 3143 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 3144 3145 drop_query = f"DROP TABLE {if_exists_str}" + sql_item_name( 3146 temp_table_name, self.flavor, self.get_pipe_schema(pipe) 3147 ) 3148 return [ 3149 rename_query, 3150 create_query, 3151 insert_query, 3152 drop_query, 3153 ] 3154 3155 queries = [] 3156 if self.flavor == 'oracle': 3157 for col, typ in altered_cols_types.items(): 3158 add_query = ( 3159 "ALTER TABLE " 3160 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3161 + "\nADD " + sql_item_name(col + '_temp', self.flavor, None) 3162 + " " + typ 3163 ) 3164 queries.append(add_query) 3165 3166 for col, typ in altered_cols_types.items(): 3167 populate_temp_query = ( 3168 "UPDATE " 3169 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3170 + "\nSET " + sql_item_name(col + '_temp', self.flavor, None) 3171 + ' = ' + sql_item_name(col, self.flavor, None) 3172 ) 3173 queries.append(populate_temp_query) 3174 3175 for col, typ in altered_cols_types.items(): 3176 set_old_cols_to_null_query = ( 3177 "UPDATE " 3178 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3179 + "\nSET " + sql_item_name(col, self.flavor, None) 3180 + ' = NULL' 3181 ) 3182 queries.append(set_old_cols_to_null_query) 3183 3184 for col, typ in altered_cols_types.items(): 3185 alter_type_query = ( 3186 "ALTER TABLE " 3187 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3188 + "\nMODIFY " + sql_item_name(col, self.flavor, None) + ' ' 3189 + typ 3190 ) 3191 queries.append(alter_type_query) 3192 3193 for col, typ in altered_cols_types.items(): 3194 set_old_to_temp_query = ( 3195 "UPDATE " 3196 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3197 + "\nSET " + sql_item_name(col, self.flavor, None) 3198 + ' = ' + sql_item_name(col + '_temp', self.flavor, None) 3199 ) 3200 queries.append(set_old_to_temp_query) 3201 3202 for col, typ in altered_cols_types.items(): 3203 drop_temp_query = ( 3204 "ALTER TABLE " 3205 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3206 + "\nDROP COLUMN " + sql_item_name(col + '_temp', self.flavor, None) 3207 ) 3208 queries.append(drop_temp_query) 3209 3210 return queries 3211 3212 query = "ALTER TABLE " + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3213 for col, typ in altered_cols_types.items(): 3214 alter_col_prefix = ( 3215 'ALTER' if self.flavor not in ('mysql', 'mariadb', 'oracle') 3216 else 'MODIFY' 3217 ) 3218 type_prefix = ( 3219 '' if self.flavor in ('mssql', 'mariadb', 'mysql') 3220 else 'TYPE ' 3221 ) 3222 column_str = 'COLUMN' if self.flavor != 'oracle' else '' 3223 query += ( 3224 f"\n{alter_col_prefix} {column_str} " 3225 + sql_item_name(col, self.flavor, None) 3226 + " " + type_prefix + typ + "," 3227 ) 3228 3229 query = query[:-1] 3230 queries.append(query) 3231 if self.flavor != 'duckdb': 3232 return queries 3233 3234 drop_index_queries = list(flatten_list( 3235 [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()] 3236 )) 3237 create_index_queries = list(flatten_list( 3238 [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()] 3239 )) 3240 3241 return drop_index_queries + queries + create_index_queries
If we encounter a column of a different type, set the entire column to text. If the altered columns are numeric, alter to numeric instead.
Parameters
- pipe (mrsm.Pipe): The pipe to be altered.
- df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which may contain altered columns. If a dict is provided, assume it maps columns to Pandas data types.
Returns
- A list of the
ALTER TABLE
SQL query or queries to be executed on the provided connector.
781def delete_pipe( 782 self, 783 pipe: mrsm.Pipe, 784 debug: bool = False, 785) -> SuccessTuple: 786 """ 787 Delete a Pipe's registration. 788 """ 789 from meerschaum.utils.sql import sql_item_name 790 from meerschaum.utils.debug import dprint 791 from meerschaum.utils.packages import attempt_import 792 sqlalchemy = attempt_import('sqlalchemy') 793 794 if not pipe.id: 795 return False, f"{pipe} is not registered." 796 797 ### ensure pipes table exists 798 from meerschaum.connectors.sql.tables import get_tables 799 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 800 801 q = sqlalchemy.delete(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id) 802 if not self.exec(q, debug=debug): 803 return False, f"Failed to delete registration for {pipe}." 804 805 return True, "Success"
Delete a Pipe's registration.
808def get_pipe_data( 809 self, 810 pipe: mrsm.Pipe, 811 select_columns: Optional[List[str]] = None, 812 omit_columns: Optional[List[str]] = None, 813 begin: Union[datetime, str, None] = None, 814 end: Union[datetime, str, None] = None, 815 params: Optional[Dict[str, Any]] = None, 816 order: str = 'asc', 817 limit: Optional[int] = None, 818 begin_add_minutes: int = 0, 819 end_add_minutes: int = 0, 820 debug: bool = False, 821 **kw: Any 822) -> Union[pd.DataFrame, None]: 823 """ 824 Access a pipe's data from the SQL instance. 825 826 Parameters 827 ---------- 828 pipe: mrsm.Pipe: 829 The pipe to get data from. 830 831 select_columns: Optional[List[str]], default None 832 If provided, only select these given columns. 833 Otherwise select all available columns (i.e. `SELECT *`). 834 835 omit_columns: Optional[List[str]], default None 836 If provided, remove these columns from the selection. 837 838 begin: Union[datetime, str, None], default None 839 If provided, get rows newer than or equal to this value. 840 841 end: Union[datetime, str, None], default None 842 If provided, get rows older than or equal to this value. 843 844 params: Optional[Dict[str, Any]], default None 845 Additional parameters to filter by. 846 See `meerschaum.connectors.sql.build_where`. 847 848 order: Optional[str], default 'asc' 849 The selection order for all of the indices in the query. 850 If `None`, omit the `ORDER BY` clause. 851 852 limit: Optional[int], default None 853 If specified, limit the number of rows retrieved to this value. 854 855 begin_add_minutes: int, default 0 856 The number of minutes to add to the `begin` datetime (i.e. `DATEADD`. 857 858 end_add_minutes: int, default 0 859 The number of minutes to add to the `end` datetime (i.e. `DATEADD`. 860 861 chunksize: Optional[int], default -1 862 The size of dataframe chunks to load into memory. 863 864 debug: bool, default False 865 Verbosity toggle. 866 867 Returns 868 ------- 869 A `pd.DataFrame` of the pipe's data. 870 871 """ 872 import json 873 from meerschaum.utils.sql import sql_item_name 874 from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype 875 from meerschaum.utils.packages import import_pandas 876 from meerschaum.utils.dtypes import ( 877 attempt_cast_to_numeric, 878 attempt_cast_to_uuid, 879 are_dtypes_equal, 880 ) 881 from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type 882 pd = import_pandas() 883 is_dask = 'dask' in pd.__name__ 884 885 cols_types = pipe.get_columns_types(debug=debug) 886 dtypes = { 887 **{ 888 p_col: to_pandas_dtype(p_typ) 889 for p_col, p_typ in pipe.dtypes.items() 890 }, 891 **{ 892 col: get_pd_type_from_db_type(typ) 893 for col, typ in cols_types.items() 894 } 895 } 896 if dtypes: 897 if self.flavor == 'sqlite': 898 if not pipe.columns.get('datetime', None): 899 _dt = pipe.guess_datetime() 900 dt = sql_item_name(_dt, self.flavor, None) if _dt else None 901 is_guess = True 902 else: 903 _dt = pipe.get_columns('datetime') 904 dt = sql_item_name(_dt, self.flavor, None) 905 is_guess = False 906 907 if _dt: 908 dt_type = dtypes.get(_dt, 'object').lower() 909 if 'datetime' not in dt_type: 910 if 'int' not in dt_type: 911 dtypes[_dt] = 'datetime64[ns, UTC]' 912 existing_cols = pipe.get_columns_types(debug=debug) 913 select_columns = ( 914 [ 915 col 916 for col in existing_cols 917 if col not in (omit_columns or []) 918 ] 919 if not select_columns 920 else [ 921 col 922 for col in select_columns 923 if col in existing_cols 924 and col not in (omit_columns or []) 925 ] 926 ) 927 if select_columns: 928 dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns} 929 dtypes = { 930 col: to_pandas_dtype(typ) 931 for col, typ in dtypes.items() 932 if col in select_columns and col not in (omit_columns or []) 933 } 934 query = self.get_pipe_data_query( 935 pipe, 936 select_columns=select_columns, 937 omit_columns=omit_columns, 938 begin=begin, 939 end=end, 940 params=params, 941 order=order, 942 limit=limit, 943 begin_add_minutes=begin_add_minutes, 944 end_add_minutes=end_add_minutes, 945 debug=debug, 946 **kw 947 ) 948 949 if is_dask: 950 index_col = pipe.columns.get('datetime', None) 951 kw['index_col'] = index_col 952 953 numeric_columns = [ 954 col 955 for col, typ in pipe.dtypes.items() 956 if typ == 'numeric' and col in dtypes 957 ] 958 uuid_columns = [ 959 col 960 for col, typ in pipe.dtypes.items() 961 if typ == 'uuid' and col in dtypes 962 ] 963 964 kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0)) 965 966 df = self.read( 967 query, 968 dtype=dtypes, 969 debug=debug, 970 **kw 971 ) 972 for col in numeric_columns: 973 if col not in df.columns: 974 continue 975 df[col] = df[col].apply(attempt_cast_to_numeric) 976 977 for col in uuid_columns: 978 if col not in df.columns: 979 continue 980 df[col] = df[col].apply(attempt_cast_to_uuid) 981 982 if self.flavor == 'sqlite': 983 ignore_dt_cols = [ 984 col 985 for col, dtype in pipe.dtypes.items() 986 if not are_dtypes_equal(str(dtype), 'datetime') 987 ] 988 ### NOTE: We have to consume the iterator here to ensure that datetimes are parsed correctly 989 df = ( 990 parse_df_datetimes( 991 df, 992 ignore_cols=ignore_dt_cols, 993 chunksize=kw.get('chunksize', None), 994 strip_timezone=(pipe.tzinfo is None), 995 debug=debug, 996 ) if isinstance(df, pd.DataFrame) else ( 997 [ 998 parse_df_datetimes( 999 c, 1000 ignore_cols=ignore_dt_cols, 1001 chunksize=kw.get('chunksize', None), 1002 strip_timezone=(pipe.tzinfo is None), 1003 debug=debug, 1004 ) 1005 for c in df 1006 ] 1007 ) 1008 ) 1009 for col, typ in dtypes.items(): 1010 if typ != 'json': 1011 continue 1012 df[col] = df[col].apply(lambda x: json.loads(x) if x is not None else x) 1013 return df
Access a pipe's data from the SQL instance.
Parameters
- pipe (mrsm.Pipe:): The pipe to get data from.
- select_columns (Optional[List[str]], default None):
If provided, only select these given columns.
Otherwise select all available columns (i.e.
SELECT *
). - omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
- begin (Union[datetime, str, None], default None): If provided, get rows newer than or equal to this value.
- end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
- params (Optional[Dict[str, Any]], default None):
Additional parameters to filter by.
See
meerschaum.connectors.sql.build_where
. - order (Optional[str], default 'asc'):
The selection order for all of the indices in the query.
If
None
, omit theORDER BY
clause. - limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
- begin_add_minutes (int, default 0):
The number of minutes to add to the
begin
datetime (i.e.DATEADD
. - end_add_minutes (int, default 0):
The number of minutes to add to the
end
datetime (i.e.DATEADD
. - chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
- debug (bool, default False): Verbosity toggle.
Returns
- A
pd.DataFrame
of the pipe's data.
1016def get_pipe_data_query( 1017 self, 1018 pipe: mrsm.Pipe, 1019 select_columns: Optional[List[str]] = None, 1020 omit_columns: Optional[List[str]] = None, 1021 begin: Union[datetime, int, str, None] = None, 1022 end: Union[datetime, int, str, None] = None, 1023 params: Optional[Dict[str, Any]] = None, 1024 order: Optional[str] = 'asc', 1025 sort_datetimes: bool = False, 1026 limit: Optional[int] = None, 1027 begin_add_minutes: int = 0, 1028 end_add_minutes: int = 0, 1029 replace_nulls: Optional[str] = None, 1030 skip_existing_cols_check: bool = False, 1031 debug: bool = False, 1032 **kw: Any 1033) -> Union[str, None]: 1034 """ 1035 Return the `SELECT` query for retrieving a pipe's data from its instance. 1036 1037 Parameters 1038 ---------- 1039 pipe: mrsm.Pipe: 1040 The pipe to get data from. 1041 1042 select_columns: Optional[List[str]], default None 1043 If provided, only select these given columns. 1044 Otherwise select all available columns (i.e. `SELECT *`). 1045 1046 omit_columns: Optional[List[str]], default None 1047 If provided, remove these columns from the selection. 1048 1049 begin: Union[datetime, int, str, None], default None 1050 If provided, get rows newer than or equal to this value. 1051 1052 end: Union[datetime, str, None], default None 1053 If provided, get rows older than or equal to this value. 1054 1055 params: Optional[Dict[str, Any]], default None 1056 Additional parameters to filter by. 1057 See `meerschaum.connectors.sql.build_where`. 1058 1059 order: Optional[str], default None 1060 The selection order for all of the indices in the query. 1061 If `None`, omit the `ORDER BY` clause. 1062 1063 sort_datetimes: bool, default False 1064 Alias for `order='desc'`. 1065 1066 limit: Optional[int], default None 1067 If specified, limit the number of rows retrieved to this value. 1068 1069 begin_add_minutes: int, default 0 1070 The number of minutes to add to the `begin` datetime (i.e. `DATEADD`). 1071 1072 end_add_minutes: int, default 0 1073 The number of minutes to add to the `end` datetime (i.e. `DATEADD`). 1074 1075 chunksize: Optional[int], default -1 1076 The size of dataframe chunks to load into memory. 1077 1078 replace_nulls: Optional[str], default None 1079 If provided, replace null values with this value. 1080 1081 skip_existing_cols_check: bool, default False 1082 If `True`, do not verify that querying columns are actually on the table. 1083 1084 debug: bool, default False 1085 Verbosity toggle. 1086 1087 Returns 1088 ------- 1089 A `SELECT` query to retrieve a pipe's data. 1090 """ 1091 from meerschaum.utils.misc import items_str 1092 from meerschaum.utils.sql import sql_item_name, dateadd_str 1093 from meerschaum.utils.dtypes import coerce_timezone 1094 from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type 1095 1096 dt_col = pipe.columns.get('datetime', None) 1097 existing_cols = pipe.get_columns_types(debug=debug) 1098 dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None 1099 select_columns = ( 1100 [col for col in existing_cols] 1101 if not select_columns 1102 else [col for col in select_columns if col in existing_cols or skip_existing_cols_check] 1103 ) 1104 if omit_columns: 1105 select_columns = [col for col in select_columns if col not in omit_columns] 1106 1107 if order is None and sort_datetimes: 1108 order = 'desc' 1109 1110 if begin == '': 1111 begin = pipe.get_sync_time(debug=debug) 1112 backtrack_interval = pipe.get_backtrack_interval(debug=debug) 1113 if begin is not None: 1114 begin -= backtrack_interval 1115 1116 begin, end = pipe.parse_date_bounds(begin, end) 1117 if isinstance(begin, datetime) and dt_typ: 1118 begin = coerce_timezone(begin, strip_utc=('utc' not in dt_typ.lower())) 1119 if isinstance(end, datetime) and dt_typ: 1120 end = coerce_timezone(end, strip_utc=('utc' not in dt_typ.lower())) 1121 1122 cols_names = [ 1123 sql_item_name(col, self.flavor, None) 1124 for col in select_columns 1125 ] 1126 select_cols_str = ( 1127 'SELECT\n ' 1128 + ',\n '.join( 1129 [ 1130 ( 1131 col_name 1132 if not replace_nulls 1133 else f"COALESCE(col_name, '{replace_nulls}') AS {col_name}" 1134 ) 1135 for col_name in cols_names 1136 ] 1137 ) 1138 ) if cols_names else 'SELECT *' 1139 pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 1140 query = f"{select_cols_str}\nFROM {pipe_table_name}" 1141 where = "" 1142 1143 if order is not None: 1144 default_order = 'asc' 1145 if order not in ('asc', 'desc'): 1146 warn(f"Ignoring unsupported order '{order}'. Falling back to '{default_order}'.") 1147 order = default_order 1148 order = order.upper() 1149 1150 if not pipe.columns.get('datetime', None): 1151 _dt = pipe.guess_datetime() 1152 dt = sql_item_name(_dt, self.flavor, None) if _dt else None 1153 is_guess = True 1154 else: 1155 _dt = pipe.get_columns('datetime') 1156 dt = sql_item_name(_dt, self.flavor, None) 1157 is_guess = False 1158 1159 quoted_indices = { 1160 key: sql_item_name(val, self.flavor, None) 1161 for key, val in pipe.columns.items() 1162 if val in existing_cols or skip_existing_cols_check 1163 } 1164 1165 if begin is not None or end is not None: 1166 if is_guess: 1167 if _dt is None: 1168 warn( 1169 f"No datetime could be determined for {pipe}." 1170 + "\n Ignoring begin and end...", 1171 stack=False, 1172 ) 1173 begin, end = None, None 1174 else: 1175 warn( 1176 f"A datetime wasn't specified for {pipe}.\n" 1177 + f" Using column \"{_dt}\" for datetime bounds...", 1178 stack=False, 1179 ) 1180 1181 is_dt_bound = False 1182 if begin is not None and (_dt in existing_cols or skip_existing_cols_check): 1183 begin_da = dateadd_str( 1184 flavor=self.flavor, 1185 datepart='minute', 1186 number=begin_add_minutes, 1187 begin=begin, 1188 ) 1189 where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "") 1190 is_dt_bound = True 1191 1192 if end is not None and (_dt in existing_cols or skip_existing_cols_check): 1193 if 'int' in str(type(end)).lower() and end == begin: 1194 end += 1 1195 end_da = dateadd_str( 1196 flavor=self.flavor, 1197 datepart='minute', 1198 number=end_add_minutes, 1199 begin=end 1200 ) 1201 where += f"{dt} < {end_da}" 1202 is_dt_bound = True 1203 1204 if params is not None: 1205 from meerschaum.utils.sql import build_where 1206 valid_params = { 1207 k: v 1208 for k, v in params.items() 1209 if k in existing_cols or skip_existing_cols_check 1210 } 1211 if valid_params: 1212 where += build_where(valid_params, self).replace( 1213 'WHERE', ('AND' if is_dt_bound else "") 1214 ) 1215 1216 if len(where) > 0: 1217 query += "\nWHERE " + where 1218 1219 if order is not None: 1220 ### Sort by indices, starting with datetime. 1221 order_by = "" 1222 if quoted_indices: 1223 order_by += "\nORDER BY " 1224 if _dt and (_dt in existing_cols or skip_existing_cols_check): 1225 order_by += dt + ' ' + order + ',' 1226 for key, quoted_col_name in quoted_indices.items(): 1227 if dt == quoted_col_name: 1228 continue 1229 order_by += ' ' + quoted_col_name + ' ' + order + ',' 1230 order_by = order_by[:-1] 1231 1232 query += order_by 1233 1234 if isinstance(limit, int): 1235 if self.flavor == 'mssql': 1236 query = f'SELECT TOP {limit}\n' + query[len("SELECT "):] 1237 elif self.flavor == 'oracle': 1238 query = ( 1239 f"SELECT * FROM (\n {query}\n)\n" 1240 + f"WHERE ROWNUM IN ({', '.join([str(i) for i in range(1, limit+1)])})" 1241 ) 1242 else: 1243 query += f"\nLIMIT {limit}" 1244 1245 if debug: 1246 to_print = ( 1247 [] 1248 + ([f"begin='{begin}'"] if begin else []) 1249 + ([f"end='{end}'"] if end else []) 1250 + ([f"params={params}"] if params else []) 1251 ) 1252 dprint("Getting pipe data with constraints: " + items_str(to_print, quotes=False)) 1253 1254 return query
Return the SELECT
query for retrieving a pipe's data from its instance.
Parameters
- pipe (mrsm.Pipe:): The pipe to get data from.
- select_columns (Optional[List[str]], default None):
If provided, only select these given columns.
Otherwise select all available columns (i.e.
SELECT *
). - omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
- begin (Union[datetime, int, str, None], default None): If provided, get rows newer than or equal to this value.
- end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
- params (Optional[Dict[str, Any]], default None):
Additional parameters to filter by.
See
meerschaum.connectors.sql.build_where
. - order (Optional[str], default None):
The selection order for all of the indices in the query.
If
None
, omit theORDER BY
clause. - sort_datetimes (bool, default False):
Alias for
order='desc'
. - limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
- begin_add_minutes (int, default 0):
The number of minutes to add to the
begin
datetime (i.e.DATEADD
). - end_add_minutes (int, default 0):
The number of minutes to add to the
end
datetime (i.e.DATEADD
). - chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
- replace_nulls (Optional[str], default None): If provided, replace null values with this value.
- skip_existing_cols_check (bool, default False):
If
True
, do not verify that querying columns are actually on the table. - debug (bool, default False): Verbosity toggle.
Returns
- A
SELECT
query to retrieve a pipe's data.
19def register_pipe( 20 self, 21 pipe: mrsm.Pipe, 22 debug: bool = False, 23) -> SuccessTuple: 24 """ 25 Register a new pipe. 26 A pipe's attributes must be set before registering. 27 """ 28 from meerschaum.utils.debug import dprint 29 from meerschaum.utils.packages import attempt_import 30 from meerschaum.utils.sql import json_flavors 31 32 ### ensure pipes table exists 33 from meerschaum.connectors.sql.tables import get_tables 34 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 35 36 if pipe.get_id(debug=debug) is not None: 37 return False, f"{pipe} is already registered." 38 39 ### NOTE: if `parameters` is supplied in the Pipe constructor, 40 ### then `pipe.parameters` will exist and not be fetched from the database. 41 42 ### 1. Prioritize the Pipe object's `parameters` first. 43 ### E.g. if the user manually sets the `parameters` property 44 ### or if the Pipe already exists 45 ### (which shouldn't be able to be registered anyway but that's an issue for later). 46 parameters = None 47 try: 48 parameters = pipe.parameters 49 except Exception as e: 50 if debug: 51 dprint(str(e)) 52 parameters = None 53 54 ### ensure `parameters` is a dictionary 55 if parameters is None: 56 parameters = {} 57 58 import json 59 sqlalchemy = attempt_import('sqlalchemy') 60 values = { 61 'connector_keys' : pipe.connector_keys, 62 'metric_key' : pipe.metric_key, 63 'location_key' : pipe.location_key, 64 'parameters' : ( 65 json.dumps(parameters) 66 if self.flavor not in json_flavors 67 else parameters 68 ), 69 } 70 query = sqlalchemy.insert(pipes_tbl).values(**values) 71 result = self.exec(query, debug=debug) 72 if result is None: 73 return False, f"Failed to register {pipe}." 74 return True, f"Successfully registered {pipe}."
Register a new pipe. A pipe's attributes must be set before registering.
77def edit_pipe( 78 self, 79 pipe : mrsm.Pipe = None, 80 patch: bool = False, 81 debug: bool = False, 82 **kw : Any 83) -> SuccessTuple: 84 """ 85 Persist a Pipe's parameters to its database. 86 87 Parameters 88 ---------- 89 pipe: mrsm.Pipe, default None 90 The pipe to be edited. 91 patch: bool, default False 92 If patch is `True`, update the existing parameters by cascading. 93 Otherwise overwrite the parameters (default). 94 debug: bool, default False 95 Verbosity toggle. 96 """ 97 98 if pipe.id is None: 99 return False, f"{pipe} is not registered and cannot be edited." 100 101 from meerschaum.utils.debug import dprint 102 from meerschaum.utils.packages import attempt_import 103 from meerschaum.utils.sql import json_flavors 104 if not patch: 105 parameters = pipe.__dict__.get('_attributes', {}).get('parameters', {}) 106 else: 107 from meerschaum import Pipe 108 from meerschaum.config._patch import apply_patch_to_config 109 original_parameters = Pipe( 110 pipe.connector_keys, pipe.metric_key, pipe.location_key, 111 mrsm_instance=pipe.instance_keys 112 ).parameters 113 parameters = apply_patch_to_config( 114 original_parameters, 115 pipe.parameters 116 ) 117 118 ### ensure pipes table exists 119 from meerschaum.connectors.sql.tables import get_tables 120 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 121 122 import json 123 sqlalchemy = attempt_import('sqlalchemy') 124 125 values = { 126 'parameters': ( 127 json.dumps(parameters) 128 if self.flavor not in json_flavors 129 else parameters 130 ), 131 } 132 q = sqlalchemy.update(pipes_tbl).values(**values).where( 133 pipes_tbl.c.pipe_id == pipe.id 134 ) 135 136 result = self.exec(q, debug=debug) 137 message = ( 138 f"Successfully edited {pipe}." 139 if result is not None else f"Failed to edit {pipe}." 140 ) 141 return (result is not None), message
Persist a Pipe's parameters to its database.
Parameters
- pipe (mrsm.Pipe, default None): The pipe to be edited.
- patch (bool, default False):
If patch is
True
, update the existing parameters by cascading. Otherwise overwrite the parameters (default). - debug (bool, default False): Verbosity toggle.
1257def get_pipe_id( 1258 self, 1259 pipe: mrsm.Pipe, 1260 debug: bool = False, 1261) -> Any: 1262 """ 1263 Get a Pipe's ID from the pipes table. 1264 """ 1265 if pipe.temporary: 1266 return None 1267 from meerschaum.utils.packages import attempt_import 1268 import json 1269 sqlalchemy = attempt_import('sqlalchemy') 1270 from meerschaum.connectors.sql.tables import get_tables 1271 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 1272 1273 query = sqlalchemy.select(pipes_tbl.c.pipe_id).where( 1274 pipes_tbl.c.connector_keys == pipe.connector_keys 1275 ).where( 1276 pipes_tbl.c.metric_key == pipe.metric_key 1277 ).where( 1278 (pipes_tbl.c.location_key == pipe.location_key) if pipe.location_key is not None 1279 else pipes_tbl.c.location_key.is_(None) 1280 ) 1281 _id = self.value(query, debug=debug, silent=pipe.temporary) 1282 if _id is not None: 1283 _id = int(_id) 1284 return _id
Get a Pipe's ID from the pipes table.
1287def get_pipe_attributes( 1288 self, 1289 pipe: mrsm.Pipe, 1290 debug: bool = False, 1291) -> Dict[str, Any]: 1292 """ 1293 Get a Pipe's attributes dictionary. 1294 """ 1295 from meerschaum.connectors.sql.tables import get_tables 1296 from meerschaum.utils.packages import attempt_import 1297 sqlalchemy = attempt_import('sqlalchemy') 1298 1299 if pipe.get_id(debug=debug) is None: 1300 return {} 1301 1302 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 1303 1304 try: 1305 q = sqlalchemy.select(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id) 1306 if debug: 1307 dprint(q) 1308 attributes = ( 1309 dict(self.exec(q, silent=True, debug=debug).first()._mapping) 1310 if self.flavor != 'duckdb' 1311 else self.read(q, debug=debug).to_dict(orient='records')[0] 1312 ) 1313 except Exception as e: 1314 import traceback 1315 traceback.print_exc() 1316 warn(e) 1317 print(pipe) 1318 return {} 1319 1320 ### handle non-PostgreSQL databases (text vs JSON) 1321 if not isinstance(attributes.get('parameters', None), dict): 1322 try: 1323 import json 1324 parameters = json.loads(attributes['parameters']) 1325 if isinstance(parameters, str) and parameters[0] == '{': 1326 parameters = json.loads(parameters) 1327 attributes['parameters'] = parameters 1328 except Exception as e: 1329 attributes['parameters'] = {} 1330 1331 return attributes
Get a Pipe's attributes dictionary.
1398def sync_pipe( 1399 self, 1400 pipe: mrsm.Pipe, 1401 df: Union[pd.DataFrame, str, Dict[Any, Any], None] = None, 1402 begin: Optional[datetime] = None, 1403 end: Optional[datetime] = None, 1404 chunksize: Optional[int] = -1, 1405 check_existing: bool = True, 1406 blocking: bool = True, 1407 debug: bool = False, 1408 _check_temporary_tables: bool = True, 1409 **kw: Any 1410) -> SuccessTuple: 1411 """ 1412 Sync a pipe using a database connection. 1413 1414 Parameters 1415 ---------- 1416 pipe: mrsm.Pipe 1417 The Meerschaum Pipe instance into which to sync the data. 1418 1419 df: Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]] 1420 An optional DataFrame or equivalent to sync into the pipe. 1421 Defaults to `None`. 1422 1423 begin: Optional[datetime], default None 1424 Optionally specify the earliest datetime to search for data. 1425 Defaults to `None`. 1426 1427 end: Optional[datetime], default None 1428 Optionally specify the latest datetime to search for data. 1429 Defaults to `None`. 1430 1431 chunksize: Optional[int], default -1 1432 Specify the number of rows to sync per chunk. 1433 If `-1`, resort to system configuration (default is `900`). 1434 A `chunksize` of `None` will sync all rows in one transaction. 1435 Defaults to `-1`. 1436 1437 check_existing: bool, default True 1438 If `True`, pull and diff with existing data from the pipe. Defaults to `True`. 1439 1440 blocking: bool, default True 1441 If `True`, wait for sync to finish and return its result, otherwise asyncronously sync. 1442 Defaults to `True`. 1443 1444 debug: bool, default False 1445 Verbosity toggle. Defaults to False. 1446 1447 kw: Any 1448 Catch-all for keyword arguments. 1449 1450 Returns 1451 ------- 1452 A `SuccessTuple` of success (`bool`) and message (`str`). 1453 """ 1454 from meerschaum.utils.packages import import_pandas 1455 from meerschaum.utils.sql import ( 1456 get_update_queries, 1457 sql_item_name, 1458 update_queries, 1459 get_create_table_queries, 1460 get_reset_autoincrement_queries, 1461 ) 1462 from meerschaum.utils.misc import generate_password 1463 from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols 1464 from meerschaum.utils.dtypes import are_dtypes_equal 1465 from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type 1466 from meerschaum import Pipe 1467 import time 1468 import copy 1469 pd = import_pandas() 1470 if df is None: 1471 msg = f"DataFrame is None. Cannot sync {pipe}." 1472 warn(msg) 1473 return False, msg 1474 1475 start = time.perf_counter() 1476 pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe)) 1477 1478 if not pipe.temporary and not pipe.get_id(debug=debug): 1479 register_tuple = pipe.register(debug=debug) 1480 if not register_tuple[0]: 1481 return register_tuple 1482 1483 ### df is the dataframe returned from the remote source 1484 ### via the connector 1485 if debug: 1486 dprint("Fetched data:\n" + str(df)) 1487 1488 if not isinstance(df, pd.DataFrame): 1489 df = pipe.enforce_dtypes( 1490 df, 1491 chunksize=chunksize, 1492 safe_copy=kw.get('safe_copy', False), 1493 debug=debug, 1494 ) 1495 1496 ### if table does not exist, create it with indices 1497 is_new = False 1498 if not pipe.exists(debug=debug): 1499 check_existing = False 1500 is_new = True 1501 else: 1502 ### Check for new columns. 1503 add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug) 1504 if add_cols_queries: 1505 _ = pipe.__dict__.pop('_columns_indices', None) 1506 _ = pipe.__dict__.pop('_columns_types', None) 1507 if not self.exec_queries(add_cols_queries, debug=debug): 1508 warn(f"Failed to add new columns to {pipe}.") 1509 1510 alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug) 1511 if alter_cols_queries: 1512 _ = pipe.__dict__.pop('_columns_indices', None) 1513 _ = pipe.__dict__.pop('_columns_types', None) 1514 if not self.exec_queries(alter_cols_queries, debug=debug): 1515 warn(f"Failed to alter columns for {pipe}.") 1516 else: 1517 _ = pipe.infer_dtypes(persist=True) 1518 1519 ### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans, 1520 ### so infer bools and persist them to `dtypes`. 1521 if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'): 1522 pipe_dtypes = pipe.dtypes 1523 new_bool_cols = { 1524 col: 'bool[pyarrow]' 1525 for col, typ in df.dtypes.items() 1526 if col not in pipe_dtypes 1527 and are_dtypes_equal(str(typ), 'bool') 1528 } 1529 pipe_dtypes.update(new_bool_cols) 1530 pipe.dtypes = pipe_dtypes 1531 if new_bool_cols and not pipe.temporary: 1532 infer_bool_success, infer_bool_msg = pipe.edit(debug=debug) 1533 if not infer_bool_success: 1534 return infer_bool_success, infer_bool_msg 1535 1536 upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries 1537 if upsert: 1538 check_existing = False 1539 kw['safe_copy'] = kw.get('safe_copy', False) 1540 1541 unseen_df, update_df, delta_df = ( 1542 pipe.filter_existing( 1543 df, 1544 chunksize=chunksize, 1545 debug=debug, 1546 **kw 1547 ) if check_existing else (df, None, df) 1548 ) 1549 if upsert: 1550 unseen_df, update_df, delta_df = (df.head(0), df, df) 1551 1552 if debug: 1553 dprint("Delta data:\n" + str(delta_df)) 1554 dprint("Unseen data:\n" + str(unseen_df)) 1555 if update_df is not None: 1556 dprint(("Update" if not upsert else "Upsert") + " data:\n" + str(update_df)) 1557 1558 if_exists = kw.get('if_exists', 'append') 1559 if 'if_exists' in kw: 1560 kw.pop('if_exists') 1561 if 'name' in kw: 1562 kw.pop('name') 1563 1564 ### Insert new data into Pipe's table. 1565 unseen_kw = copy.deepcopy(kw) 1566 unseen_kw.update({ 1567 'name': pipe.target, 1568 'if_exists': if_exists, 1569 'debug': debug, 1570 'as_dict': True, 1571 'chunksize': chunksize, 1572 'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True), 1573 'schema': self.get_pipe_schema(pipe), 1574 }) 1575 1576 primary_key = pipe.columns.get('primary', None) 1577 autoincrement = ( 1578 pipe.parameters.get('autoincrement', False) 1579 or ( 1580 is_new 1581 and primary_key 1582 and primary_key 1583 not in pipe.dtypes 1584 and primary_key not in unseen_df.columns 1585 ) 1586 ) 1587 if autoincrement and autoincrement not in pipe.parameters: 1588 pipe.parameters['autoincrement'] = autoincrement 1589 edit_success, edit_msg = pipe.edit(debug=debug) 1590 if not edit_success: 1591 return edit_success, edit_msg 1592 1593 autoincrement_needs_reset = False 1594 if autoincrement and primary_key: 1595 if primary_key not in df.columns: 1596 if unseen_df is not None and primary_key in unseen_df.columns: 1597 del unseen_df[primary_key] 1598 if update_df is not None and primary_key in update_df.columns: 1599 del update_df[primary_key] 1600 if delta_df is not None and primary_key in delta_df.columns: 1601 del delta_df[primary_key] 1602 elif unseen_df[primary_key].notnull().any(): 1603 autoincrement_needs_reset = True 1604 1605 if is_new: 1606 create_success, create_msg = self.create_pipe_table_from_df( 1607 pipe, 1608 unseen_df, 1609 debug=debug, 1610 ) 1611 if not create_success: 1612 return create_success, create_msg 1613 1614 do_identity_insert = bool( 1615 self.flavor in ('mssql',) 1616 and primary_key in unseen_df.columns 1617 and autoincrement 1618 ) 1619 with self.engine.connect() as connection: 1620 with connection.begin(): 1621 if do_identity_insert: 1622 identity_on_result = self.exec( 1623 f"SET IDENTITY_INSERT {pipe_name} ON", 1624 commit=False, 1625 _connection=connection, 1626 close=False, 1627 debug=debug, 1628 ) 1629 if identity_on_result is None: 1630 return False, f"Could not enable identity inserts on {pipe}." 1631 1632 stats = self.to_sql( 1633 unseen_df, 1634 _connection=connection, 1635 **unseen_kw 1636 ) 1637 1638 if do_identity_insert: 1639 identity_off_result = self.exec( 1640 f"SET IDENTITY_INSERT {pipe_name} OFF", 1641 commit=False, 1642 _connection=connection, 1643 close=False, 1644 debug=debug, 1645 ) 1646 if identity_off_result is None: 1647 return False, f"Could not disable identity inserts on {pipe}." 1648 1649 if is_new: 1650 if not self.create_indices(pipe, debug=debug): 1651 warn(f"Failed to create indices for {pipe}. Continuing...") 1652 1653 if autoincrement_needs_reset: 1654 reset_autoincrement_queries = get_reset_autoincrement_queries( 1655 pipe.target, 1656 primary_key, 1657 self, 1658 schema=self.get_pipe_schema(pipe), 1659 debug=debug, 1660 ) 1661 results = self.exec_queries(reset_autoincrement_queries, debug=debug) 1662 for result in results: 1663 if result is None: 1664 warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False) 1665 1666 if update_df is not None and len(update_df) > 0: 1667 transact_id = generate_password(3) 1668 temp_prefix = '##' if self.flavor != 'oracle' else '_' 1669 temp_target = temp_prefix + transact_id + '_' + pipe.target 1670 self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug) 1671 temp_pipe = Pipe( 1672 pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key, 1673 instance=pipe.instance_keys, 1674 columns={ 1675 (ix_key if ix_key != 'primary' else 'primary_'): ix 1676 for ix_key, ix in pipe.columns.items() 1677 if ix and ix in update_df.columns 1678 }, 1679 dtypes={ 1680 col: typ 1681 for col, typ in pipe.dtypes.items() 1682 if col in update_df.columns 1683 }, 1684 target=temp_target, 1685 temporary=True, 1686 parameters={ 1687 'static': True, 1688 'schema': self.internal_schema, 1689 'hypertable': False, 1690 'autoincrement': False, 1691 }, 1692 ) 1693 temp_pipe.__dict__['_columns_types'] = { 1694 col: get_db_type_from_pd_type( 1695 pipe.dtypes.get(col, str(typ)), 1696 self.flavor, 1697 ) 1698 for col, typ in update_df.dtypes.items() 1699 } 1700 now_ts = time.perf_counter() 1701 temp_pipe.__dict__['_columns_types_timestamp'] = now_ts 1702 temp_pipe.__dict__['_skip_check_indices'] = True 1703 temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug) 1704 if not temp_success: 1705 return temp_success, temp_msg 1706 existing_cols = pipe.get_columns_types(debug=debug) 1707 join_cols = [ 1708 col 1709 for col_key, col in pipe.columns.items() 1710 if col and col in existing_cols 1711 ] 1712 update_queries = get_update_queries( 1713 pipe.target, 1714 temp_target, 1715 self, 1716 join_cols, 1717 upsert=upsert, 1718 schema=self.get_pipe_schema(pipe), 1719 patch_schema=self.internal_schema, 1720 datetime_col=pipe.columns.get('datetime', None), 1721 debug=debug, 1722 ) 1723 update_success = all( 1724 self.exec_queries(update_queries, break_on_error=True, rollback=True, debug=debug) 1725 ) 1726 self._log_temporary_tables_creation( 1727 temp_target, 1728 ready_to_drop=True, 1729 create=(not pipe.temporary), 1730 debug=debug, 1731 ) 1732 if not update_success: 1733 warn(f"Failed to apply update to {pipe}.") 1734 1735 stop = time.perf_counter() 1736 success = stats['success'] 1737 if not success: 1738 return success, stats['msg'] 1739 1740 unseen_count = len(unseen_df.index) if unseen_df is not None else 0 1741 update_count = len(update_df.index) if update_df is not None else 0 1742 msg = ( 1743 ( 1744 f"Inserted {unseen_count}, " 1745 + f"updated {update_count} rows." 1746 ) 1747 if not upsert 1748 else ( 1749 f"Upserted {update_count} row" 1750 + ('s' if update_count != 1 else '') 1751 + "." 1752 ) 1753 ) 1754 if debug: 1755 msg = msg[:-1] + ( 1756 f"\non table {sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))}\n" 1757 + f"in {round(stop - start, 2)} seconds." 1758 ) 1759 1760 if _check_temporary_tables: 1761 drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables( 1762 refresh=False, debug=debug 1763 ) 1764 if not drop_stale_success: 1765 warn(drop_stale_msg) 1766 1767 return success, msg
Sync a pipe using a database connection.
Parameters
- pipe (mrsm.Pipe): The Meerschaum Pipe instance into which to sync the data.
- df (Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]]):
An optional DataFrame or equivalent to sync into the pipe.
Defaults to
None
. - begin (Optional[datetime], default None):
Optionally specify the earliest datetime to search for data.
Defaults to
None
. - end (Optional[datetime], default None):
Optionally specify the latest datetime to search for data.
Defaults to
None
. - chunksize (Optional[int], default -1):
Specify the number of rows to sync per chunk.
If
-1
, resort to system configuration (default is900
). Achunksize
ofNone
will sync all rows in one transaction. Defaults to-1
. - check_existing (bool, default True):
If
True
, pull and diff with existing data from the pipe. Defaults toTrue
. - blocking (bool, default True):
If
True
, wait for sync to finish and return its result, otherwise asyncronously sync. Defaults toTrue
. - debug (bool, default False): Verbosity toggle. Defaults to False.
- kw (Any): Catch-all for keyword arguments.
Returns
- A
SuccessTuple
of success (bool
) and message (str
).
1770def sync_pipe_inplace( 1771 self, 1772 pipe: 'mrsm.Pipe', 1773 params: Optional[Dict[str, Any]] = None, 1774 begin: Union[datetime, int, None] = None, 1775 end: Union[datetime, int, None] = None, 1776 chunksize: Optional[int] = -1, 1777 check_existing: bool = True, 1778 debug: bool = False, 1779 **kw: Any 1780) -> SuccessTuple: 1781 """ 1782 If a pipe's connector is the same as its instance connector, 1783 it's more efficient to sync the pipe in-place rather than reading data into Pandas. 1784 1785 Parameters 1786 ---------- 1787 pipe: mrsm.Pipe 1788 The pipe whose connector is the same as its instance. 1789 1790 params: Optional[Dict[str, Any]], default None 1791 Optional params dictionary to build the `WHERE` clause. 1792 See `meerschaum.utils.sql.build_where`. 1793 1794 begin: Union[datetime, int, None], default None 1795 Optionally specify the earliest datetime to search for data. 1796 Defaults to `None`. 1797 1798 end: Union[datetime, int, None], default None 1799 Optionally specify the latest datetime to search for data. 1800 Defaults to `None`. 1801 1802 chunksize: Optional[int], default -1 1803 Specify the number of rows to sync per chunk. 1804 If `-1`, resort to system configuration (default is `900`). 1805 A `chunksize` of `None` will sync all rows in one transaction. 1806 Defaults to `-1`. 1807 1808 check_existing: bool, default True 1809 If `True`, pull and diff with existing data from the pipe. 1810 1811 debug: bool, default False 1812 Verbosity toggle. 1813 1814 Returns 1815 ------- 1816 A SuccessTuple. 1817 """ 1818 if self.flavor == 'duckdb': 1819 return pipe.sync( 1820 params=params, 1821 begin=begin, 1822 end=end, 1823 chunksize=chunksize, 1824 check_existing=check_existing, 1825 debug=debug, 1826 _inplace=False, 1827 **kw 1828 ) 1829 from meerschaum.utils.sql import ( 1830 sql_item_name, 1831 get_update_queries, 1832 get_null_replacement, 1833 get_create_table_queries, 1834 get_table_cols_types, 1835 session_execute, 1836 update_queries, 1837 ) 1838 from meerschaum.utils.dtypes import are_dtypes_equal 1839 from meerschaum.utils.dtypes.sql import ( 1840 get_pd_type_from_db_type, 1841 ) 1842 from meerschaum.utils.misc import generate_password 1843 1844 transact_id = generate_password(3) 1845 def get_temp_table_name(label: str) -> str: 1846 temp_prefix = '##' if self.flavor != 'oracle' else '_' 1847 return temp_prefix + transact_id + '_' + label + '_' + pipe.target 1848 1849 internal_schema = self.internal_schema 1850 temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update'] 1851 temp_tables = { 1852 table_root: get_temp_table_name(table_root) 1853 for table_root in temp_table_roots 1854 } 1855 temp_table_names = { 1856 table_root: sql_item_name( 1857 table_name_raw, 1858 self.flavor, 1859 internal_schema, 1860 ) 1861 for table_root, table_name_raw in temp_tables.items() 1862 } 1863 metadef = self.get_pipe_metadef( 1864 pipe, 1865 params=params, 1866 begin=begin, 1867 end=end, 1868 check_existing=check_existing, 1869 debug=debug, 1870 ) 1871 pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 1872 upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in update_queries 1873 static = pipe.parameters.get('static', False) 1874 database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None)) 1875 primary_key = pipe.columns.get('primary', None) 1876 autoincrement = pipe.parameters.get('autoincrement', False) 1877 dt_col = pipe.columns.get('datetime', None) 1878 dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None 1879 1880 def clean_up_temp_tables(ready_to_drop: bool = False): 1881 log_success, log_msg = self._log_temporary_tables_creation( 1882 [ 1883 table 1884 for table in temp_tables.values() 1885 ] if not upsert else [temp_tables['update']], 1886 ready_to_drop=ready_to_drop, 1887 create=(not pipe.temporary), 1888 debug=debug, 1889 ) 1890 if not log_success: 1891 warn(log_msg) 1892 drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables( 1893 refresh=False, 1894 debug=debug, 1895 ) 1896 if not drop_stale_success: 1897 warn(drop_stale_msg) 1898 return drop_stale_success, drop_stale_msg 1899 1900 sqlalchemy, sqlalchemy_orm = mrsm.attempt_import('sqlalchemy', 'sqlalchemy.orm') 1901 if not pipe.exists(debug=debug): 1902 create_pipe_queries = get_create_table_queries( 1903 metadef, 1904 pipe.target, 1905 self.flavor, 1906 schema=self.get_pipe_schema(pipe), 1907 primary_key=primary_key, 1908 autoincrement=autoincrement, 1909 datetime_column=dt_col, 1910 ) 1911 result = self.exec_queries(create_pipe_queries, debug=debug) 1912 if result is None: 1913 _ = clean_up_temp_tables() 1914 return False, f"Could not insert new data into {pipe} from its SQL query definition." 1915 1916 if not self.create_indices(pipe, debug=debug): 1917 warn(f"Failed to create indices for {pipe}. Continuing...") 1918 1919 rowcount = pipe.get_rowcount(debug=debug) 1920 _ = clean_up_temp_tables() 1921 return True, f"Inserted {rowcount}, updated 0 rows." 1922 1923 session = sqlalchemy_orm.Session(self.engine) 1924 connectable = session if self.flavor != 'duckdb' else self 1925 1926 create_new_query = get_create_table_queries( 1927 metadef, 1928 temp_tables[('new') if not upsert else 'update'], 1929 self.flavor, 1930 schema=internal_schema, 1931 )[0] 1932 (create_new_success, create_new_msg), create_new_results = session_execute( 1933 session, 1934 create_new_query, 1935 with_results=True, 1936 debug=debug, 1937 ) 1938 if not create_new_success: 1939 _ = clean_up_temp_tables() 1940 return create_new_success, create_new_msg 1941 new_count = create_new_results[0].rowcount if create_new_results else 0 1942 1943 new_cols_types = get_table_cols_types( 1944 temp_tables[('new' if not upsert else 'update')], 1945 connectable=connectable, 1946 flavor=self.flavor, 1947 schema=internal_schema, 1948 database=database, 1949 debug=debug, 1950 ) if not static else pipe.get_columns_types(debug=debug) 1951 if not new_cols_types: 1952 return False, f"Failed to get new columns for {pipe}." 1953 1954 new_cols = { 1955 str(col_name): get_pd_type_from_db_type(str(col_type)) 1956 for col_name, col_type in new_cols_types.items() 1957 } 1958 new_cols_str = ', '.join([ 1959 sql_item_name(col, self.flavor) 1960 for col in new_cols 1961 ]) 1962 def get_col_typ(col: str, cols_types: Dict[str, str]) -> str: 1963 if self.flavor == 'oracle' and new_cols_types.get(col, '').lower() == 'char': 1964 return new_cols_types[col] 1965 return cols_types[col] 1966 1967 add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug) 1968 if add_cols_queries: 1969 _ = pipe.__dict__.pop('_columns_types', None) 1970 _ = pipe.__dict__.pop('_columns_indices', None) 1971 self.exec_queries(add_cols_queries, debug=debug) 1972 1973 alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug) 1974 if alter_cols_queries: 1975 _ = pipe.__dict__.pop('_columns_types', None) 1976 self.exec_queries(alter_cols_queries, debug=debug) 1977 1978 insert_queries = [ 1979 ( 1980 f"INSERT INTO {pipe_name} ({new_cols_str})\n" 1981 + f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}" 1982 ) 1983 ] if not check_existing and not upsert else [] 1984 1985 new_queries = insert_queries 1986 new_success, new_msg = ( 1987 session_execute(session, new_queries, debug=debug) 1988 if new_queries 1989 else (True, "Success") 1990 ) 1991 if not new_success: 1992 _ = clean_up_temp_tables() 1993 return new_success, new_msg 1994 1995 if not check_existing: 1996 session.commit() 1997 _ = clean_up_temp_tables() 1998 return True, f"Inserted {new_count}, updated 0 rows." 1999 2000 (new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute( 2001 session, 2002 [ 2003 "SELECT\n" 2004 f" MIN({dt_col_name}) AS {sql_item_name('min_dt', self.flavor)},\n" 2005 f" MAX({dt_col_name}) AS {sql_item_name('max_dt', self.flavor)}\n" 2006 f"FROM {temp_table_names['new' if not upsert else 'update']}\n" 2007 f"WHERE {dt_col_name} IS NOT NULL" 2008 ], 2009 with_results=True, 2010 debug=debug, 2011 ) if dt_col and not upsert else ((True, "Success"), None) 2012 if not new_dt_bounds_success: 2013 return ( 2014 new_dt_bounds_success, 2015 f"Could not determine in-place datetime bounds:\n{new_dt_bounds_msg}" 2016 ) 2017 2018 if dt_col and not upsert: 2019 begin, end = new_dt_bounds_results[0].fetchone() 2020 2021 backtrack_def = self.get_pipe_data_query( 2022 pipe, 2023 begin=begin, 2024 end=end, 2025 begin_add_minutes=0, 2026 end_add_minutes=1, 2027 params=params, 2028 debug=debug, 2029 order=None, 2030 ) 2031 create_backtrack_query = get_create_table_queries( 2032 backtrack_def, 2033 temp_tables['backtrack'], 2034 self.flavor, 2035 schema=internal_schema, 2036 )[0] 2037 (create_backtrack_success, create_backtrack_msg), create_backtrack_results = session_execute( 2038 session, 2039 create_backtrack_query, 2040 with_results=True, 2041 debug=debug, 2042 ) if not upsert else ((True, "Success"), None) 2043 2044 if not create_backtrack_success: 2045 _ = clean_up_temp_tables() 2046 return create_backtrack_success, create_backtrack_msg 2047 2048 backtrack_cols_types = get_table_cols_types( 2049 temp_tables['backtrack'], 2050 connectable=connectable, 2051 flavor=self.flavor, 2052 schema=internal_schema, 2053 database=database, 2054 debug=debug, 2055 ) if not (upsert or static) else new_cols_types 2056 2057 common_cols = [col for col in new_cols if col in backtrack_cols_types] 2058 on_cols = { 2059 col: new_cols.get(col) 2060 for col_key, col in pipe.columns.items() 2061 if ( 2062 col 2063 and 2064 col_key != 'value' 2065 and col in backtrack_cols_types 2066 and col in new_cols 2067 ) 2068 } 2069 2070 null_replace_new_cols_str = ( 2071 ', '.join([ 2072 f"COALESCE({temp_table_names['new']}.{sql_item_name(col, self.flavor, None)}, " 2073 + get_null_replacement(get_col_typ(col, new_cols), self.flavor) 2074 + ") AS " 2075 + sql_item_name(col, self.flavor, None) 2076 for col, typ in new_cols.items() 2077 ]) 2078 ) 2079 2080 select_delta_query = ( 2081 "SELECT\n" 2082 + null_replace_new_cols_str + "\n" 2083 + f"\nFROM {temp_table_names['new']}\n" 2084 + f"LEFT OUTER JOIN {temp_table_names['backtrack']}\nON\n" 2085 + '\nAND\n'.join([ 2086 ( 2087 f"COALESCE({temp_table_names['new']}." 2088 + sql_item_name(c, self.flavor, None) 2089 + ", " 2090 + get_null_replacement(get_col_typ(c, new_cols), self.flavor) 2091 + ") " 2092 + ' = ' 2093 + f"COALESCE({temp_table_names['backtrack']}." 2094 + sql_item_name(c, self.flavor, None) 2095 + ", " 2096 + get_null_replacement(backtrack_cols_types[c], self.flavor) 2097 + ") " 2098 ) for c in common_cols 2099 ]) 2100 + "\nWHERE\n" 2101 + '\nAND\n'.join([ 2102 ( 2103 f"{temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None) + ' IS NULL' 2104 ) for c in common_cols 2105 ]) 2106 ) 2107 create_delta_query = get_create_table_queries( 2108 select_delta_query, 2109 temp_tables['delta'], 2110 self.flavor, 2111 schema=internal_schema, 2112 )[0] 2113 create_delta_success, create_delta_msg = session_execute( 2114 session, 2115 create_delta_query, 2116 debug=debug, 2117 ) if not upsert else (True, "Success") 2118 if not create_delta_success: 2119 _ = clean_up_temp_tables() 2120 return create_delta_success, create_delta_msg 2121 2122 delta_cols_types = get_table_cols_types( 2123 temp_tables['delta'], 2124 connectable=connectable, 2125 flavor=self.flavor, 2126 schema=internal_schema, 2127 database=database, 2128 debug=debug, 2129 ) if not (upsert or static) else new_cols_types 2130 2131 ### This is a weird bug on SQLite. 2132 ### Sometimes the backtrack dtypes are all empty strings. 2133 if not all(delta_cols_types.values()): 2134 delta_cols_types = new_cols_types 2135 2136 delta_cols = { 2137 col: get_pd_type_from_db_type(typ) 2138 for col, typ in delta_cols_types.items() 2139 } 2140 delta_cols_str = ', '.join([ 2141 sql_item_name(col, self.flavor) 2142 for col in delta_cols 2143 ]) 2144 2145 select_joined_query = ( 2146 "SELECT " 2147 + (', '.join([ 2148 ( 2149 f"{temp_table_names['delta']}." + sql_item_name(c, self.flavor, None) 2150 + " AS " + sql_item_name(c + '_delta', self.flavor, None) 2151 ) for c in delta_cols 2152 ])) 2153 + ", " 2154 + (', '.join([ 2155 ( 2156 f"{temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None) 2157 + " AS " + sql_item_name(c + '_backtrack', self.flavor, None) 2158 ) for c in backtrack_cols_types 2159 ])) 2160 + f"\nFROM {temp_table_names['delta']}\n" 2161 + f"LEFT OUTER JOIN {temp_table_names['backtrack']}\nON\n" 2162 + '\nAND\n'.join([ 2163 ( 2164 f"COALESCE({temp_table_names['delta']}." + sql_item_name(c, self.flavor, None) 2165 + ", " 2166 + get_null_replacement( 2167 get_col_typ(c, on_cols), 2168 self.flavor 2169 ) + ")" 2170 + ' = ' 2171 + f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None) 2172 + ", " 2173 + get_null_replacement( 2174 get_col_typ(c, on_cols), 2175 self.flavor 2176 ) + ")" 2177 ) for c, typ in on_cols.items() 2178 ]) 2179 ) 2180 2181 create_joined_query = get_create_table_queries( 2182 select_joined_query, 2183 temp_tables['joined'], 2184 self.flavor, 2185 schema=internal_schema, 2186 )[0] 2187 create_joined_success, create_joined_msg = session_execute( 2188 session, 2189 create_joined_query, 2190 debug=debug, 2191 ) if on_cols and not upsert else (True, "Success") 2192 if not create_joined_success: 2193 _ = clean_up_temp_tables() 2194 return create_joined_success, create_joined_msg 2195 2196 select_unseen_query = ( 2197 "SELECT " 2198 + (', '.join([ 2199 ( 2200 "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None) 2201 + " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor) 2202 + " THEN " + sql_item_name(c + '_delta', self.flavor, None) 2203 + "\n ELSE NULL\nEND " 2204 + " AS " + sql_item_name(c, self.flavor, None) 2205 ) for c, typ in delta_cols.items() 2206 ])) 2207 + f"\nFROM {temp_table_names['joined']}\n" 2208 + "WHERE " 2209 + '\nAND\n'.join([ 2210 ( 2211 sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL' 2212 ) for c in delta_cols 2213 ]) 2214 ) 2215 create_unseen_query = get_create_table_queries( 2216 select_unseen_query, 2217 temp_tables['unseen'], 2218 self.flavor, 2219 internal_schema, 2220 )[0] 2221 (create_unseen_success, create_unseen_msg), create_unseen_results = session_execute( 2222 session, 2223 create_unseen_query, 2224 with_results=True, 2225 debug=debug 2226 ) if not upsert else ((True, "Success"), None) 2227 if not create_unseen_success: 2228 _ = clean_up_temp_tables() 2229 return create_unseen_success, create_unseen_msg 2230 2231 select_update_query = ( 2232 "SELECT " 2233 + (', '.join([ 2234 ( 2235 "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None) 2236 + " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor) 2237 + " THEN " + sql_item_name(c + '_delta', self.flavor, None) 2238 + "\n ELSE NULL\nEND " 2239 + " AS " + sql_item_name(c, self.flavor, None) 2240 ) for c, typ in delta_cols.items() 2241 ])) 2242 + f"\nFROM {temp_table_names['joined']}\n" 2243 + "WHERE " 2244 + '\nOR\n'.join([ 2245 ( 2246 sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL' 2247 ) for c in delta_cols 2248 ]) 2249 ) 2250 2251 create_update_query = get_create_table_queries( 2252 select_update_query, 2253 temp_tables['update'], 2254 self.flavor, 2255 internal_schema, 2256 )[0] 2257 (create_update_success, create_update_msg), create_update_results = session_execute( 2258 session, 2259 create_update_query, 2260 with_results=True, 2261 debug=debug, 2262 ) if on_cols and not upsert else ((True, "Success"), []) 2263 apply_update_queries = ( 2264 get_update_queries( 2265 pipe.target, 2266 temp_tables['update'], 2267 session, 2268 on_cols, 2269 upsert=upsert, 2270 schema=self.get_pipe_schema(pipe), 2271 patch_schema=internal_schema, 2272 datetime_col=pipe.columns.get('datetime', None), 2273 flavor=self.flavor, 2274 debug=debug, 2275 ) 2276 if on_cols else [] 2277 ) 2278 2279 apply_unseen_queries = [ 2280 ( 2281 f"INSERT INTO {pipe_name} ({delta_cols_str})\n" 2282 + f"SELECT {delta_cols_str}\nFROM " 2283 + ( 2284 temp_table_names['unseen'] 2285 if on_cols 2286 else temp_table_names['delta'] 2287 ) 2288 ), 2289 ] 2290 2291 (apply_unseen_success, apply_unseen_msg), apply_unseen_results = session_execute( 2292 session, 2293 apply_unseen_queries, 2294 with_results=True, 2295 debug=debug, 2296 ) if not upsert else ((True, "Success"), None) 2297 if not apply_unseen_success: 2298 _ = clean_up_temp_tables() 2299 return apply_unseen_success, apply_unseen_msg 2300 unseen_count = apply_unseen_results[0].rowcount if apply_unseen_results else 0 2301 2302 (apply_update_success, apply_update_msg), apply_update_results = session_execute( 2303 session, 2304 apply_update_queries, 2305 with_results=True, 2306 debug=debug, 2307 ) 2308 if not apply_update_success: 2309 _ = clean_up_temp_tables() 2310 return apply_update_success, apply_update_msg 2311 update_count = apply_update_results[0].rowcount if apply_update_results else 0 2312 2313 session.commit() 2314 2315 msg = ( 2316 f"Inserted {unseen_count}, updated {update_count} rows." 2317 if not upsert 2318 else f"Upserted {update_count} row" + ('s' if update_count != 1 else '') + "." 2319 ) 2320 _ = clean_up_temp_tables(ready_to_drop=True) 2321 2322 return True, msg
If a pipe's connector is the same as its instance connector, it's more efficient to sync the pipe in-place rather than reading data into Pandas.
Parameters
- pipe (mrsm.Pipe): The pipe whose connector is the same as its instance.
- params (Optional[Dict[str, Any]], default None):
Optional params dictionary to build the
WHERE
clause. Seemeerschaum.utils.sql.build_where
. - begin (Union[datetime, int, None], default None):
Optionally specify the earliest datetime to search for data.
Defaults to
None
. - end (Union[datetime, int, None], default None):
Optionally specify the latest datetime to search for data.
Defaults to
None
. - chunksize (Optional[int], default -1):
Specify the number of rows to sync per chunk.
If
-1
, resort to system configuration (default is900
). Achunksize
ofNone
will sync all rows in one transaction. Defaults to-1
. - check_existing (bool, default True):
If
True
, pull and diff with existing data from the pipe. - debug (bool, default False): Verbosity toggle.
Returns
- A SuccessTuple.
2325def get_sync_time( 2326 self, 2327 pipe: 'mrsm.Pipe', 2328 params: Optional[Dict[str, Any]] = None, 2329 newest: bool = True, 2330 debug: bool = False, 2331) -> Union[datetime, int, None]: 2332 """Get a Pipe's most recent datetime value. 2333 2334 Parameters 2335 ---------- 2336 pipe: mrsm.Pipe 2337 The pipe to get the sync time for. 2338 2339 params: Optional[Dict[str, Any]], default None 2340 Optional params dictionary to build the `WHERE` clause. 2341 See `meerschaum.utils.sql.build_where`. 2342 2343 newest: bool, default True 2344 If `True`, get the most recent datetime (honoring `params`). 2345 If `False`, get the oldest datetime (ASC instead of DESC). 2346 2347 Returns 2348 ------- 2349 A `datetime` object (or `int` if using an integer axis) if the pipe exists, otherwise `None`. 2350 """ 2351 from meerschaum.utils.sql import sql_item_name, build_where 2352 table = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 2353 2354 dt_col = pipe.columns.get('datetime', None) 2355 if dt_col is None: 2356 return None 2357 dt_col_name = sql_item_name(dt_col, self.flavor, None) 2358 2359 ASC_or_DESC = "DESC" if newest else "ASC" 2360 existing_cols = pipe.get_columns_types(debug=debug) 2361 valid_params = {} 2362 if params is not None: 2363 valid_params = {k: v for k, v in params.items() if k in existing_cols} 2364 2365 ### If no bounds are provided for the datetime column, 2366 ### add IS NOT NULL to the WHERE clause. 2367 if dt_col not in valid_params: 2368 valid_params[dt_col] = '_None' 2369 where = "" if not valid_params else build_where(valid_params, self) 2370 q = f"SELECT {dt_col_name}\nFROM {table}{where}\nORDER BY {dt_col_name} {ASC_or_DESC}\nLIMIT 1" 2371 if self.flavor == 'mssql': 2372 q = f"SELECT TOP 1 {dt_col_name}\nFROM {table}{where}\nORDER BY {dt_col_name} {ASC_or_DESC}" 2373 elif self.flavor == 'oracle': 2374 q = ( 2375 "SELECT * FROM (\n" 2376 + f" SELECT {dt_col_name}\nFROM {table}{where}\n " 2377 + f"ORDER BY {dt_col_name} {ASC_or_DESC}\n" 2378 + ") WHERE ROWNUM = 1" 2379 ) 2380 2381 try: 2382 db_time = self.value(q, silent=True, debug=debug) 2383 2384 ### No datetime could be found. 2385 if db_time is None: 2386 return None 2387 ### sqlite returns str. 2388 if isinstance(db_time, str): 2389 from meerschaum.utils.packages import attempt_import 2390 dateutil_parser = attempt_import('dateutil.parser') 2391 st = dateutil_parser.parse(db_time) 2392 ### Do nothing if a datetime object is returned. 2393 elif isinstance(db_time, datetime): 2394 if hasattr(db_time, 'to_pydatetime'): 2395 st = db_time.to_pydatetime() 2396 else: 2397 st = db_time 2398 ### Sometimes the datetime is actually a date. 2399 elif isinstance(db_time, date): 2400 st = datetime.combine(db_time, datetime.min.time()) 2401 ### Adding support for an integer datetime axis. 2402 elif 'int' in str(type(db_time)).lower(): 2403 st = int(db_time) 2404 ### Convert pandas timestamp to Python datetime. 2405 else: 2406 st = db_time.to_pydatetime() 2407 2408 sync_time = st 2409 2410 except Exception as e: 2411 sync_time = None 2412 warn(str(e)) 2413 2414 return sync_time
Get a Pipe's most recent datetime value.
Parameters
- pipe (mrsm.Pipe): The pipe to get the sync time for.
- params (Optional[Dict[str, Any]], default None):
Optional params dictionary to build the
WHERE
clause. Seemeerschaum.utils.sql.build_where
. - newest (bool, default True):
If
True
, get the most recent datetime (honoringparams
). IfFalse
, get the oldest datetime (ASC instead of DESC).
Returns
- A
datetime
object (orint
if using an integer axis) if the pipe exists, otherwiseNone
.
2417def pipe_exists( 2418 self, 2419 pipe: mrsm.Pipe, 2420 debug: bool = False 2421) -> bool: 2422 """ 2423 Check that a Pipe's table exists. 2424 2425 Parameters 2426 ---------- 2427 pipe: mrsm.Pipe: 2428 The pipe to check. 2429 2430 debug: bool, default False 2431 Verbosity toggle. 2432 2433 Returns 2434 ------- 2435 A `bool` corresponding to whether a pipe's table exists. 2436 2437 """ 2438 from meerschaum.utils.sql import table_exists 2439 exists = table_exists( 2440 pipe.target, 2441 self, 2442 schema=self.get_pipe_schema(pipe), 2443 debug=debug, 2444 ) 2445 if debug: 2446 from meerschaum.utils.debug import dprint 2447 dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.')) 2448 return exists
Check that a Pipe's table exists.
Parameters
- pipe (mrsm.Pipe:): The pipe to check.
- debug (bool, default False): Verbosity toggle.
Returns
- A
bool
corresponding to whether a pipe's table exists.
2451def get_pipe_rowcount( 2452 self, 2453 pipe: mrsm.Pipe, 2454 begin: Union[datetime, int, None] = None, 2455 end: Union[datetime, int, None] = None, 2456 params: Optional[Dict[str, Any]] = None, 2457 remote: bool = False, 2458 debug: bool = False 2459) -> Union[int, None]: 2460 """ 2461 Get the rowcount for a pipe in accordance with given parameters. 2462 2463 Parameters 2464 ---------- 2465 pipe: mrsm.Pipe 2466 The pipe to query with. 2467 2468 begin: Union[datetime, int, None], default None 2469 The begin datetime value. 2470 2471 end: Union[datetime, int, None], default None 2472 The end datetime value. 2473 2474 params: Optional[Dict[str, Any]], default None 2475 See `meerschaum.utils.sql.build_where`. 2476 2477 remote: bool, default False 2478 If `True`, get the rowcount for the remote table. 2479 2480 debug: bool, default False 2481 Verbosity toggle. 2482 2483 Returns 2484 ------- 2485 An `int` for the number of rows if the `pipe` exists, otherwise `None`. 2486 2487 """ 2488 from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte 2489 from meerschaum.connectors.sql._fetch import get_pipe_query 2490 if remote: 2491 msg = f"'fetch:definition' must be an attribute of {pipe} to get a remote rowcount." 2492 if 'fetch' not in pipe.parameters: 2493 error(msg) 2494 return None 2495 if 'definition' not in pipe.parameters['fetch']: 2496 error(msg) 2497 return None 2498 2499 _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 2500 2501 if not pipe.columns.get('datetime', None): 2502 _dt = pipe.guess_datetime() 2503 dt = sql_item_name(_dt, self.flavor, None) if _dt else None 2504 is_guess = True 2505 else: 2506 _dt = pipe.get_columns('datetime') 2507 dt = sql_item_name(_dt, self.flavor, None) 2508 is_guess = False 2509 2510 if begin is not None or end is not None: 2511 if is_guess: 2512 if _dt is None: 2513 warn( 2514 f"No datetime could be determined for {pipe}." 2515 + "\n Ignoring begin and end...", 2516 stack=False, 2517 ) 2518 begin, end = None, None 2519 else: 2520 warn( 2521 f"A datetime wasn't specified for {pipe}.\n" 2522 + f" Using column \"{_dt}\" for datetime bounds...", 2523 stack=False, 2524 ) 2525 2526 2527 _datetime_name = sql_item_name( 2528 _dt, 2529 ( 2530 pipe.instance_connector.flavor 2531 if not remote 2532 else pipe.connector.flavor 2533 ), 2534 None, 2535 ) 2536 _cols_names = [ 2537 sql_item_name( 2538 col, 2539 ( 2540 pipe.instance_connector.flavor 2541 if not remote 2542 else pipe.connector.flavor 2543 ), 2544 None, 2545 ) 2546 for col in set( 2547 ( 2548 [_dt] 2549 if _dt 2550 else [] 2551 ) 2552 + ( 2553 [] 2554 if params is None 2555 else list(params.keys()) 2556 ) 2557 ) 2558 ] 2559 if not _cols_names: 2560 _cols_names = ['*'] 2561 2562 src = ( 2563 f"SELECT {', '.join(_cols_names)} FROM {_pipe_name}" 2564 if not remote 2565 else get_pipe_query(pipe) 2566 ) 2567 parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', self.flavor)}" 2568 query = wrap_query_with_cte(src, parent_query, self.flavor) 2569 if begin is not None or end is not None: 2570 query += "\nWHERE" 2571 if begin is not None: 2572 query += f""" 2573 {dt} >= {dateadd_str(self.flavor, datepart='minute', number=0, begin=begin)} 2574 """ 2575 if end is not None and begin is not None: 2576 query += "AND" 2577 if end is not None: 2578 query += f""" 2579 {dt} < {dateadd_str(self.flavor, datepart='minute', number=0, begin=end)} 2580 """ 2581 if params is not None: 2582 from meerschaum.utils.sql import build_where 2583 existing_cols = pipe.get_columns_types(debug=debug) 2584 valid_params = {k: v for k, v in params.items() if k in existing_cols} 2585 if valid_params: 2586 query += build_where(valid_params, self).replace('WHERE', ( 2587 'AND' if (begin is not None or end is not None) 2588 else 'WHERE' 2589 ) 2590 ) 2591 2592 result = self.value(query, debug=debug, silent=True) 2593 try: 2594 return int(result) 2595 except Exception as e: 2596 return None
Get the rowcount for a pipe in accordance with given parameters.
Parameters
- pipe (mrsm.Pipe): The pipe to query with.
- begin (Union[datetime, int, None], default None): The begin datetime value.
- end (Union[datetime, int, None], default None): The end datetime value.
- params (Optional[Dict[str, Any]], default None):
See
meerschaum.utils.sql.build_where
. - remote (bool, default False):
If
True
, get the rowcount for the remote table. - debug (bool, default False): Verbosity toggle.
Returns
- An
int
for the number of rows if thepipe
exists, otherwiseNone
.
2599def drop_pipe( 2600 self, 2601 pipe: mrsm.Pipe, 2602 debug: bool = False, 2603 **kw 2604) -> SuccessTuple: 2605 """ 2606 Drop a pipe's tables but maintain its registration. 2607 2608 Parameters 2609 ---------- 2610 pipe: mrsm.Pipe 2611 The pipe to drop. 2612 2613 Returns 2614 ------- 2615 A `SuccessTuple` indicated success. 2616 """ 2617 from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS 2618 success = True 2619 target = pipe.target 2620 target_name = ( 2621 sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 2622 ) 2623 if table_exists(target, self, debug=debug): 2624 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 2625 success = self.exec( 2626 f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug 2627 ) is not None 2628 2629 msg = "Success" if success else f"Failed to drop {pipe}." 2630 return success, msg
Drop a pipe's tables but maintain its registration.
Parameters
- pipe (mrsm.Pipe): The pipe to drop.
Returns
- A
SuccessTuple
indicated success.
2633def clear_pipe( 2634 self, 2635 pipe: mrsm.Pipe, 2636 begin: Union[datetime, int, None] = None, 2637 end: Union[datetime, int, None] = None, 2638 params: Optional[Dict[str, Any]] = None, 2639 debug: bool = False, 2640 **kw 2641) -> SuccessTuple: 2642 """ 2643 Delete a pipe's data within a bounded or unbounded interval without dropping the table. 2644 2645 Parameters 2646 ---------- 2647 pipe: mrsm.Pipe 2648 The pipe to clear. 2649 2650 begin: Union[datetime, int, None], default None 2651 Beginning datetime. Inclusive. 2652 2653 end: Union[datetime, int, None], default None 2654 Ending datetime. Exclusive. 2655 2656 params: Optional[Dict[str, Any]], default None 2657 See `meerschaum.utils.sql.build_where`. 2658 2659 """ 2660 if not pipe.exists(debug=debug): 2661 return True, f"{pipe} does not exist, so nothing was cleared." 2662 2663 from meerschaum.utils.sql import sql_item_name, build_where, dateadd_str 2664 pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 2665 2666 if not pipe.columns.get('datetime', None): 2667 _dt = pipe.guess_datetime() 2668 dt_name = sql_item_name(_dt, self.flavor, None) if _dt else None 2669 is_guess = True 2670 else: 2671 _dt = pipe.get_columns('datetime') 2672 dt_name = sql_item_name(_dt, self.flavor, None) 2673 is_guess = False 2674 2675 if begin is not None or end is not None: 2676 if is_guess: 2677 if _dt is None: 2678 warn( 2679 f"No datetime could be determined for {pipe}." 2680 + "\n Ignoring datetime bounds...", 2681 stack = False, 2682 ) 2683 begin, end = None, None 2684 else: 2685 warn( 2686 f"A datetime wasn't specified for {pipe}.\n" 2687 + f" Using column \"{_dt}\" for datetime bounds...", 2688 stack = False, 2689 ) 2690 2691 valid_params = {} 2692 if params is not None: 2693 existing_cols = pipe.get_columns_types(debug=debug) 2694 valid_params = {k: v for k, v in params.items() if k in existing_cols} 2695 clear_query = ( 2696 f"DELETE FROM {pipe_name}\nWHERE 1 = 1\n" 2697 + (' AND ' + build_where(valid_params, self, with_where=False) if valid_params else '') 2698 + ( 2699 f' AND {dt_name} >= ' + dateadd_str(self.flavor, 'day', 0, begin) 2700 if begin is not None else '' 2701 ) + ( 2702 f' AND {dt_name} < ' + dateadd_str(self.flavor, 'day', 0, end) 2703 if end is not None else '' 2704 ) 2705 ) 2706 success = self.exec(clear_query, silent=True, debug=debug) is not None 2707 msg = "Success" if success else f"Failed to clear {pipe}." 2708 return success, msg
Delete a pipe's data within a bounded or unbounded interval without dropping the table.
Parameters
- pipe (mrsm.Pipe): The pipe to clear.
- begin (Union[datetime, int, None], default None): Beginning datetime. Inclusive.
- end (Union[datetime, int, None], default None): Ending datetime. Exclusive.
- params (Optional[Dict[str, Any]], default None):
See
meerschaum.utils.sql.build_where
.
3299def deduplicate_pipe( 3300 self, 3301 pipe: mrsm.Pipe, 3302 begin: Union[datetime, int, None] = None, 3303 end: Union[datetime, int, None] = None, 3304 params: Optional[Dict[str, Any]] = None, 3305 debug: bool = False, 3306 **kwargs: Any 3307) -> SuccessTuple: 3308 """ 3309 Delete duplicate values within a pipe's table. 3310 3311 Parameters 3312 ---------- 3313 pipe: mrsm.Pipe 3314 The pipe whose table to deduplicate. 3315 3316 begin: Union[datetime, int, None], default None 3317 If provided, only deduplicate values greater than or equal to this value. 3318 3319 end: Union[datetime, int, None], default None 3320 If provided, only deduplicate values less than this value. 3321 3322 params: Optional[Dict[str, Any]], default None 3323 If provided, further limit deduplication to values which match this query dictionary. 3324 3325 debug: bool, default False 3326 Verbosity toggle. 3327 3328 Returns 3329 ------- 3330 A `SuccessTuple` indicating success. 3331 """ 3332 from meerschaum.utils.sql import ( 3333 sql_item_name, 3334 NO_CTE_FLAVORS, 3335 get_rename_table_queries, 3336 NO_SELECT_INTO_FLAVORS, 3337 DROP_IF_EXISTS_FLAVORS, 3338 get_create_table_query, 3339 format_cte_subquery, 3340 get_null_replacement, 3341 ) 3342 from meerschaum.utils.misc import generate_password, flatten_list 3343 3344 pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 3345 3346 if not pipe.exists(debug=debug): 3347 return False, f"Table {pipe_table_name} does not exist." 3348 3349 ### TODO: Handle deleting duplicates without a datetime axis. 3350 dt_col = pipe.columns.get('datetime', None) 3351 dt_col_name = sql_item_name(dt_col, self.flavor, None) 3352 cols_types = pipe.get_columns_types(debug=debug) 3353 existing_cols = pipe.get_columns_types(debug=debug) 3354 3355 get_rowcount_query = f"SELECT COUNT(*) FROM {pipe_table_name}" 3356 old_rowcount = self.value(get_rowcount_query, debug=debug) 3357 if old_rowcount is None: 3358 return False, f"Failed to get rowcount for table {pipe_table_name}." 3359 3360 ### Non-datetime indices that in fact exist. 3361 indices = [ 3362 col 3363 for key, col in pipe.columns.items() 3364 if col and col != dt_col and col in cols_types 3365 ] 3366 indices_names = [sql_item_name(index_col, self.flavor, None) for index_col in indices] 3367 existing_cols_names = [sql_item_name(col, self.flavor, None) for col in existing_cols] 3368 duplicates_cte_name = sql_item_name('dups', self.flavor, None) 3369 duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None) 3370 previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None) 3371 3372 index_list_str = ( 3373 sql_item_name(dt_col, self.flavor, None) 3374 if dt_col 3375 else '' 3376 ) 3377 index_list_str_ordered = ( 3378 ( 3379 sql_item_name(dt_col, self.flavor, None) + " DESC" 3380 ) 3381 if dt_col 3382 else '' 3383 ) 3384 if indices: 3385 index_list_str += ', ' + ', '.join(indices_names) 3386 index_list_str_ordered += ', ' + ', '.join(indices_names) 3387 if index_list_str.startswith(','): 3388 index_list_str = index_list_str.lstrip(',').lstrip() 3389 if index_list_str_ordered.startswith(','): 3390 index_list_str_ordered = index_list_str_ordered.lstrip(',').lstrip() 3391 3392 cols_list_str = ', '.join(existing_cols_names) 3393 3394 try: 3395 ### NOTE: MySQL 5 and below does not support window functions (ROW_NUMBER()). 3396 is_old_mysql = ( 3397 self.flavor in ('mysql', 'mariadb') 3398 and 3399 int(self.db_version.split('.')[0]) < 8 3400 ) 3401 except Exception as e: 3402 is_old_mysql = False 3403 3404 src_query = f""" 3405 SELECT 3406 {cols_list_str}, 3407 ROW_NUMBER() OVER ( 3408 PARTITION BY 3409 {index_list_str} 3410 ORDER BY {index_list_str_ordered} 3411 ) AS {duplicate_row_number_name} 3412 FROM {pipe_table_name} 3413 """ 3414 duplicates_cte_subquery = format_cte_subquery( 3415 src_query, 3416 self.flavor, 3417 sub_name = 'src', 3418 cols_to_select = cols_list_str, 3419 ) + f""" 3420 WHERE {duplicate_row_number_name} = 1 3421 """ 3422 old_mysql_query = ( 3423 f""" 3424 SELECT 3425 {index_list_str} 3426 FROM ( 3427 SELECT 3428 {index_list_str}, 3429 IF( 3430 @{previous_row_number_name} <> {index_list_str.replace(', ', ' + ')}, 3431 @{duplicate_row_number_name} := 0, 3432 @{duplicate_row_number_name} 3433 ), 3434 @{previous_row_number_name} := {index_list_str.replace(', ', ' + ')}, 3435 @{duplicate_row_number_name} := @{duplicate_row_number_name} + 1 AS """ 3436 + f"""{duplicate_row_number_name} 3437 FROM 3438 {pipe_table_name}, 3439 ( 3440 SELECT @{duplicate_row_number_name} := 0 3441 ) AS {duplicate_row_number_name}, 3442 ( 3443 SELECT @{previous_row_number_name} := '{get_null_replacement('str', 'mysql')}' 3444 ) AS {previous_row_number_name} 3445 ORDER BY {index_list_str_ordered} 3446 ) AS t 3447 WHERE {duplicate_row_number_name} = 1 3448 """ 3449 ) 3450 if is_old_mysql: 3451 duplicates_cte_subquery = old_mysql_query 3452 3453 session_id = generate_password(3) 3454 3455 dedup_table = '-' + session_id + f'_dedup_{pipe.target}' 3456 temp_old_table = '-' + session_id + f"_old_{pipe.target}" 3457 3458 dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe)) 3459 temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe)) 3460 3461 create_temporary_table_query = get_create_table_query( 3462 duplicates_cte_subquery, 3463 dedup_table, 3464 self.flavor, 3465 ) + f""" 3466 ORDER BY {index_list_str_ordered} 3467 """ 3468 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 3469 alter_queries = flatten_list([ 3470 get_rename_table_queries( 3471 pipe.target, temp_old_table, self.flavor, schema=self.get_pipe_schema(pipe) 3472 ), 3473 get_rename_table_queries( 3474 dedup_table, pipe.target, self.flavor, schema=self.get_pipe_schema(pipe) 3475 ), 3476 f""" 3477 DROP TABLE {if_exists_str} {temp_old_table_name} 3478 """, 3479 ]) 3480 3481 create_temporary_result = self.execute(create_temporary_table_query, debug=debug) 3482 if create_temporary_result is None: 3483 return False, f"Failed to deduplicate table {pipe_table_name}." 3484 3485 results = self.exec_queries( 3486 alter_queries, 3487 break_on_error=True, 3488 rollback=True, 3489 debug=debug, 3490 ) 3491 3492 fail_query = None 3493 for result, query in zip(results, alter_queries): 3494 if result is None: 3495 fail_query = query 3496 break 3497 success = fail_query is None 3498 3499 new_rowcount = ( 3500 self.value(get_rowcount_query, debug=debug) 3501 if success 3502 else None 3503 ) 3504 3505 msg = ( 3506 ( 3507 f"Successfully deduplicated table {pipe_table_name}" 3508 + ( 3509 f"\nfrom {old_rowcount} to {new_rowcount} rows" 3510 if old_rowcount != new_rowcount 3511 else '' 3512 ) 3513 + '.' 3514 ) 3515 if success 3516 else f"Failed to execute query:\n{fail_query}" 3517 ) 3518 return success, msg
Delete duplicate values within a pipe's table.
Parameters
- pipe (mrsm.Pipe): The pipe whose table to deduplicate.
- begin (Union[datetime, int, None], default None): If provided, only deduplicate values greater than or equal to this value.
- end (Union[datetime, int, None], default None): If provided, only deduplicate values less than this value.
- params (Optional[Dict[str, Any]], default None): If provided, further limit deduplication to values which match this query dictionary.
- debug (bool, default False): Verbosity toggle.
Returns
- A
SuccessTuple
indicating success.
2711def get_pipe_table( 2712 self, 2713 pipe: mrsm.Pipe, 2714 debug: bool = False, 2715) -> Union['sqlalchemy.Table', None]: 2716 """ 2717 Return the `sqlalchemy.Table` object for a `mrsm.Pipe`. 2718 2719 Parameters 2720 ---------- 2721 pipe: mrsm.Pipe: 2722 The pipe in question. 2723 2724 Returns 2725 ------- 2726 A `sqlalchemy.Table` object. 2727 2728 """ 2729 from meerschaum.utils.sql import get_sqlalchemy_table 2730 if not pipe.exists(debug=debug): 2731 return None 2732 return get_sqlalchemy_table( 2733 pipe.target, 2734 connector=self, 2735 schema=self.get_pipe_schema(pipe), 2736 debug=debug, 2737 refresh=True, 2738 )
Return the sqlalchemy.Table
object for a mrsm.Pipe
.
Parameters
- pipe (mrsm.Pipe:): The pipe in question.
Returns
- A
sqlalchemy.Table
object.
2741def get_pipe_columns_types( 2742 self, 2743 pipe: mrsm.Pipe, 2744 debug: bool = False, 2745) -> Dict[str, str]: 2746 """ 2747 Get the pipe's columns and types. 2748 2749 Parameters 2750 ---------- 2751 pipe: mrsm.Pipe: 2752 The pipe to get the columns for. 2753 2754 Returns 2755 ------- 2756 A dictionary of columns names (`str`) and types (`str`). 2757 2758 Examples 2759 -------- 2760 >>> conn.get_pipe_columns_types(pipe) 2761 { 2762 'dt': 'TIMESTAMP WITHOUT TIMEZONE', 2763 'id': 'BIGINT', 2764 'val': 'DOUBLE PRECISION', 2765 } 2766 >>> 2767 """ 2768 from meerschaum.utils.sql import get_table_cols_types 2769 if not pipe.exists(debug=debug): 2770 return {} 2771 2772 if self.flavor not in ('oracle', 'mysql', 'mariadb', 'sqlite'): 2773 return get_table_cols_types( 2774 pipe.target, 2775 self, 2776 flavor=self.flavor, 2777 schema=self.get_pipe_schema(pipe), 2778 debug=debug, 2779 ) 2780 2781 table_columns = {} 2782 try: 2783 pipe_table = self.get_pipe_table(pipe, debug=debug) 2784 if pipe_table is None: 2785 return {} 2786 for col in pipe_table.columns: 2787 table_columns[str(col.name)] = str(col.type) 2788 except Exception as e: 2789 import traceback 2790 traceback.print_exc() 2791 warn(e) 2792 table_columns = {} 2793 2794 return table_columns
Get the pipe's columns and types.
Parameters
- pipe (mrsm.Pipe:): The pipe to get the columns for.
Returns
- A dictionary of columns names (
str
) and types (str
).
Examples
>>> conn.get_pipe_columns_types(pipe)
{
'dt': 'TIMESTAMP WITHOUT TIMEZONE',
'id': 'BIGINT',
'val': 'DOUBLE PRECISION',
}
>>>
3244def get_to_sql_dtype( 3245 self, 3246 pipe: 'mrsm.Pipe', 3247 df: 'pd.DataFrame', 3248 update_dtypes: bool = True, 3249) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']: 3250 """ 3251 Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`. 3252 3253 Parameters 3254 ---------- 3255 pipe: mrsm.Pipe 3256 The pipe which may contain a `dtypes` parameter. 3257 3258 df: pd.DataFrame 3259 The DataFrame to be pushed via `to_sql()`. 3260 3261 update_dtypes: bool, default True 3262 If `True`, patch the pipe's dtypes onto the DataFrame's dtypes. 3263 3264 Returns 3265 ------- 3266 A dictionary with `sqlalchemy` datatypes. 3267 3268 Examples 3269 -------- 3270 >>> import pandas as pd 3271 >>> import meerschaum as mrsm 3272 >>> 3273 >>> conn = mrsm.get_connector('sql:memory') 3274 >>> df = pd.DataFrame([{'a': {'b': 1}}]) 3275 >>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'}) 3276 >>> get_to_sql_dtype(pipe, df) 3277 {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>} 3278 """ 3279 from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols 3280 from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type 3281 df_dtypes = { 3282 col: str(typ) 3283 for col, typ in df.dtypes.items() 3284 } 3285 json_cols = get_json_cols(df) 3286 numeric_cols = get_numeric_cols(df) 3287 uuid_cols = get_uuid_cols(df) 3288 df_dtypes.update({col: 'json' for col in json_cols}) 3289 df_dtypes.update({col: 'numeric' for col in numeric_cols}) 3290 df_dtypes.update({col: 'uuid' for col in uuid_cols}) 3291 if update_dtypes: 3292 df_dtypes.update(pipe.dtypes) 3293 return { 3294 col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True) 3295 for col, typ in df_dtypes.items() 3296 }
Given a pipe and DataFrame, return the dtype
dictionary for to_sql()
.
Parameters
- pipe (mrsm.Pipe):
The pipe which may contain a
dtypes
parameter. - df (pd.DataFrame):
The DataFrame to be pushed via
to_sql()
. - update_dtypes (bool, default True):
If
True
, patch the pipe's dtypes onto the DataFrame's dtypes.
Returns
- A dictionary with
sqlalchemy
datatypes.
Examples
>>> import pandas as pd
>>> import meerschaum as mrsm
>>>
>>> conn = mrsm.get_connector('sql:memory')
>>> df = pd.DataFrame([{'a': {'b': 1}}])
>>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'})
>>> get_to_sql_dtype(pipe, df)
{'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
3521def get_pipe_schema(self, pipe: mrsm.Pipe) -> Union[str, None]: 3522 """ 3523 Return the schema to use for this pipe. 3524 First check `pipe.parameters['schema']`, then check `self.schema`. 3525 3526 Parameters 3527 ---------- 3528 pipe: mrsm.Pipe 3529 The pipe which may contain a configured schema. 3530 3531 Returns 3532 ------- 3533 A schema string or `None` if nothing is configured. 3534 """ 3535 return pipe.parameters.get('schema', self.schema)
Return the schema to use for this pipe.
First check pipe.parameters['schema']
, then check self.schema
.
Parameters
- pipe (mrsm.Pipe): The pipe which may contain a configured schema.
Returns
- A schema string or
None
if nothing is configured.
1334def create_pipe_table_from_df( 1335 self, 1336 pipe: mrsm.Pipe, 1337 df: 'pd.DataFrame', 1338 debug: bool = False, 1339) -> mrsm.SuccessTuple: 1340 """ 1341 Create a pipe's table from its configured dtypes and an incoming dataframe. 1342 """ 1343 from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols 1344 from meerschaum.utils.sql import get_create_table_queries, sql_item_name 1345 primary_key = pipe.columns.get('primary', None) 1346 dt_col = pipe.columns.get('datetime', None) 1347 new_dtypes = { 1348 **{ 1349 col: str(typ) 1350 for col, typ in df.dtypes.items() 1351 }, 1352 **{ 1353 col: str(df.dtypes.get(col, 'int')) 1354 for col_ix, col in pipe.columns.items() 1355 if col and col_ix != 'primary' 1356 }, 1357 **{ 1358 col: 'uuid' 1359 for col in get_uuid_cols(df) 1360 }, 1361 **{ 1362 col: 'json' 1363 for col in get_json_cols(df) 1364 }, 1365 **{ 1366 col: 'numeric' 1367 for col in get_numeric_cols(df) 1368 }, 1369 **pipe.dtypes 1370 } 1371 autoincrement = ( 1372 pipe.parameters.get('autoincrement', False) 1373 or (primary_key and primary_key not in new_dtypes) 1374 ) 1375 if autoincrement: 1376 _ = new_dtypes.pop(primary_key, None) 1377 1378 create_table_queries = get_create_table_queries( 1379 new_dtypes, 1380 pipe.target, 1381 self.flavor, 1382 schema=self.get_pipe_schema(pipe), 1383 primary_key=primary_key, 1384 datetime_column=dt_col, 1385 ) 1386 success = all( 1387 self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug) 1388 ) 1389 target_name = sql_item_name(pipe.target, schema=self.get_pipe_schema(pipe), flavor=self.flavor) 1390 msg = ( 1391 "Success" 1392 if success 1393 else f"Failed to create {target_name}." 1394 ) 1395 return success, msg
Create a pipe's table from its configured dtypes and an incoming dataframe.
2797def get_pipe_columns_indices( 2798 self, 2799 pipe: mrsm.Pipe, 2800 debug: bool = False, 2801) -> Dict[str, List[Dict[str, str]]]: 2802 """ 2803 Return a dictionary mapping columns to the indices created on those columns. 2804 2805 Parameters 2806 ---------- 2807 pipe: mrsm.Pipe 2808 The pipe to be queried against. 2809 2810 Returns 2811 ------- 2812 A dictionary mapping columns names to lists of dictionaries. 2813 The dictionaries in the lists contain the name and type of the indices. 2814 """ 2815 if pipe.__dict__.get('_skip_check_indices', False): 2816 return {} 2817 from meerschaum.utils.sql import get_table_cols_indices 2818 return get_table_cols_indices( 2819 pipe.target, 2820 self, 2821 flavor=self.flavor, 2822 schema=self.get_pipe_schema(pipe), 2823 debug=debug, 2824 )
Return a dictionary mapping columns to the indices created on those columns.
Parameters
- pipe (mrsm.Pipe): The pipe to be queried against.
Returns
- A dictionary mapping columns names to lists of dictionaries.
- The dictionaries in the lists contain the name and type of the indices.
17def register_plugin( 18 self, 19 plugin: 'mrsm.core.Plugin', 20 force: bool = False, 21 debug: bool = False, 22 **kw: Any 23) -> SuccessTuple: 24 """Register a new plugin to the plugins table.""" 25 from meerschaum.utils.warnings import warn, error 26 from meerschaum.utils.packages import attempt_import 27 sqlalchemy = attempt_import('sqlalchemy') 28 from meerschaum.utils.sql import json_flavors 29 from meerschaum.connectors.sql.tables import get_tables 30 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 31 32 old_id = self.get_plugin_id(plugin, debug=debug) 33 34 ### Check for version conflict. May be overridden with `--force`. 35 if old_id is not None and not force: 36 old_version = self.get_plugin_version(plugin, debug=debug) 37 new_version = plugin.version 38 if old_version is None: 39 old_version = '' 40 if new_version is None: 41 new_version = '' 42 43 ### verify that the new version is greater than the old 44 packaging_version = attempt_import('packaging.version') 45 if ( 46 old_version and new_version 47 and packaging_version.parse(old_version) >= packaging_version.parse(new_version) 48 ): 49 return False, ( 50 f"Version '{new_version}' of plugin '{plugin}' " + 51 f"must be greater than existing version '{old_version}'." 52 ) 53 54 bind_variables = { 55 'plugin_name': plugin.name, 56 'version': plugin.version, 57 'attributes': ( 58 json.dumps(plugin.attributes) if self.flavor not in json_flavors else plugin.attributes 59 ), 60 'user_id': plugin.user_id, 61 } 62 63 if old_id is None: 64 query = sqlalchemy.insert(plugins_tbl).values(**bind_variables) 65 else: 66 query = ( 67 sqlalchemy.update(plugins_tbl) 68 .values(**bind_variables) 69 .where(plugins_tbl.c.plugin_id == old_id) 70 ) 71 72 result = self.exec(query, debug=debug) 73 if result is None: 74 return False, f"Failed to register plugin '{plugin}'." 75 return True, f"Successfully registered plugin '{plugin}'."
Register a new plugin to the plugins table.
243def delete_plugin( 244 self, 245 plugin: 'mrsm.core.Plugin', 246 debug: bool = False, 247 **kw: Any 248) -> SuccessTuple: 249 """Delete a plugin from the plugins table.""" 250 from meerschaum.utils.warnings import warn, error 251 from meerschaum.utils.packages import attempt_import 252 sqlalchemy = attempt_import('sqlalchemy') 253 from meerschaum.connectors.sql.tables import get_tables 254 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 255 256 plugin_id = self.get_plugin_id(plugin, debug=debug) 257 if plugin_id is None: 258 return True, f"Plugin '{plugin}' was not registered." 259 260 bind_variables = { 261 'plugin_id' : plugin_id, 262 } 263 264 query = sqlalchemy.delete(plugins_tbl).where(plugins_tbl.c.plugin_id == plugin_id) 265 result = self.exec(query, debug=debug) 266 if result is None: 267 return False, f"Failed to delete plugin '{plugin}'." 268 return True, f"Successfully deleted plugin '{plugin}'."
Delete a plugin from the plugins table.
77def get_plugin_id( 78 self, 79 plugin: 'mrsm.core.Plugin', 80 debug: bool = False 81) -> Optional[int]: 82 """ 83 Return a plugin's ID. 84 """ 85 ### ensure plugins table exists 86 from meerschaum.connectors.sql.tables import get_tables 87 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 88 from meerschaum.utils.packages import attempt_import 89 sqlalchemy = attempt_import('sqlalchemy') 90 91 query = ( 92 sqlalchemy 93 .select(plugins_tbl.c.plugin_id) 94 .where(plugins_tbl.c.plugin_name == plugin.name) 95 ) 96 97 try: 98 return int(self.value(query, debug=debug)) 99 except Exception as e: 100 return None
Return a plugin's ID.
102def get_plugin_version( 103 self, 104 plugin: 'mrsm.core.Plugin', 105 debug: bool = False 106) -> Optional[str]: 107 """ 108 Return a plugin's version. 109 """ 110 ### ensure plugins table exists 111 from meerschaum.connectors.sql.tables import get_tables 112 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 113 from meerschaum.utils.packages import attempt_import 114 sqlalchemy = attempt_import('sqlalchemy') 115 query = sqlalchemy.select(plugins_tbl.c.version).where(plugins_tbl.c.plugin_name == plugin.name) 116 return self.value(query, debug=debug)
Return a plugin's version.
196def get_plugins( 197 self, 198 user_id: Optional[int] = None, 199 search_term: Optional[str] = None, 200 debug: bool = False, 201 **kw: Any 202) -> List[str]: 203 """ 204 Return a list of all registered plugins. 205 206 Parameters 207 ---------- 208 user_id: Optional[int], default None 209 If specified, filter plugins by a specific `user_id`. 210 211 search_term: Optional[str], default None 212 If specified, add a `WHERE plugin_name LIKE '{search_term}%'` clause to filter the plugins. 213 214 215 Returns 216 ------- 217 A list of plugin names. 218 """ 219 ### ensure plugins table exists 220 from meerschaum.connectors.sql.tables import get_tables 221 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 222 from meerschaum.utils.packages import attempt_import 223 sqlalchemy = attempt_import('sqlalchemy') 224 225 query = sqlalchemy.select(plugins_tbl.c.plugin_name) 226 if user_id is not None: 227 query = query.where(plugins_tbl.c.user_id == user_id) 228 if search_term is not None: 229 query = query.where(plugins_tbl.c.plugin_name.like(search_term + '%')) 230 231 rows = ( 232 self.execute(query).fetchall() 233 if self.flavor != 'duckdb' 234 else [ 235 (row['plugin_name'],) 236 for row in self.read(query).to_dict(orient='records') 237 ] 238 ) 239 240 return [row[0] for row in rows]
Return a list of all registered plugins.
Parameters
- user_id (Optional[int], default None):
If specified, filter plugins by a specific
user_id
. - search_term (Optional[str], default None):
If specified, add a
WHERE plugin_name LIKE '{search_term}%'
clause to filter the plugins.
Returns
- A list of plugin names.
118def get_plugin_user_id( 119 self, 120 plugin: 'mrsm.core.Plugin', 121 debug: bool = False 122) -> Optional[int]: 123 """ 124 Return a plugin's user ID. 125 """ 126 ### ensure plugins table exists 127 from meerschaum.connectors.sql.tables import get_tables 128 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 129 from meerschaum.utils.packages import attempt_import 130 sqlalchemy = attempt_import('sqlalchemy') 131 132 query = ( 133 sqlalchemy 134 .select(plugins_tbl.c.user_id) 135 .where(plugins_tbl.c.plugin_name == plugin.name) 136 ) 137 138 try: 139 return int(self.value(query, debug=debug)) 140 except Exception as e: 141 return None
Return a plugin's user ID.
143def get_plugin_username( 144 self, 145 plugin: 'mrsm.core.Plugin', 146 debug: bool = False 147) -> Optional[str]: 148 """ 149 Return the username of a plugin's owner. 150 """ 151 ### ensure plugins table exists 152 from meerschaum.connectors.sql.tables import get_tables 153 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 154 users = get_tables(mrsm_instance=self, debug=debug)['users'] 155 from meerschaum.utils.packages import attempt_import 156 sqlalchemy = attempt_import('sqlalchemy') 157 158 query = ( 159 sqlalchemy.select(users.c.username) 160 .where( 161 users.c.user_id == plugins_tbl.c.user_id 162 and plugins_tbl.c.plugin_name == plugin.name 163 ) 164 ) 165 166 return self.value(query, debug=debug)
Return the username of a plugin's owner.
169def get_plugin_attributes( 170 self, 171 plugin: 'mrsm.core.Plugin', 172 debug: bool = False 173) -> Dict[str, Any]: 174 """ 175 Return the attributes of a plugin. 176 """ 177 ### ensure plugins table exists 178 from meerschaum.connectors.sql.tables import get_tables 179 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 180 from meerschaum.utils.packages import attempt_import 181 sqlalchemy = attempt_import('sqlalchemy') 182 183 query = ( 184 sqlalchemy 185 .select(plugins_tbl.c.attributes) 186 .where(plugins_tbl.c.plugin_name == plugin.name) 187 ) 188 189 _attr = self.value(query, debug=debug) 190 if isinstance(_attr, str): 191 _attr = json.loads(_attr) 192 elif _attr is None: 193 _attr = {} 194 return _attr
Return the attributes of a plugin.
16def register_user( 17 self, 18 user: mrsm.core.User, 19 debug: bool = False, 20 **kw: Any 21) -> SuccessTuple: 22 """Register a new user.""" 23 from meerschaum.utils.warnings import warn, error, info 24 from meerschaum.utils.packages import attempt_import 25 from meerschaum.utils.sql import json_flavors 26 sqlalchemy = attempt_import('sqlalchemy') 27 28 valid_tuple = valid_username(user.username) 29 if not valid_tuple[0]: 30 return valid_tuple 31 32 old_id = self.get_user_id(user, debug=debug) 33 34 if old_id is not None: 35 return False, f"User '{user}' already exists." 36 37 ### ensure users table exists 38 from meerschaum.connectors.sql.tables import get_tables 39 tables = get_tables(mrsm_instance=self, debug=debug) 40 41 import json 42 bind_variables = { 43 'username': user.username, 44 'email': user.email, 45 'password_hash': user.password_hash, 46 'user_type': user.type, 47 'attributes': ( 48 json.dumps(user.attributes) if self.flavor not in json_flavors else user.attributes 49 ), 50 } 51 if old_id is not None: 52 return False, f"User '{user.username}' already exists." 53 if old_id is None: 54 query = ( 55 sqlalchemy.insert(tables['users']). 56 values(**bind_variables) 57 ) 58 59 result = self.exec(query, debug=debug) 60 if result is None: 61 return False, f"Failed to register user '{user}'." 62 return True, f"Successfully registered user '{user}'."
Register a new user.
154def get_user_id( 155 self, 156 user: 'mrsm.core.User', 157 debug: bool = False 158) -> Optional[int]: 159 """If a user is registered, return the `user_id`.""" 160 ### ensure users table exists 161 from meerschaum.utils.packages import attempt_import 162 sqlalchemy = attempt_import('sqlalchemy') 163 from meerschaum.connectors.sql.tables import get_tables 164 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 165 166 query = ( 167 sqlalchemy.select(users_tbl.c.user_id) 168 .where(users_tbl.c.username == user.username) 169 ) 170 171 result = self.value(query, debug=debug) 172 if result is not None: 173 return int(result) 174 return None
If a user is registered, return the user_id
.
248def get_users( 249 self, 250 debug: bool = False, 251 **kw: Any 252) -> List[str]: 253 """ 254 Get the registered usernames. 255 """ 256 ### ensure users table exists 257 from meerschaum.connectors.sql.tables import get_tables 258 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 259 from meerschaum.utils.packages import attempt_import 260 sqlalchemy = attempt_import('sqlalchemy') 261 262 query = sqlalchemy.select(users_tbl.c.username) 263 264 return list(self.read(query, debug=debug)['username'])
Get the registered usernames.
99def edit_user( 100 self, 101 user: 'mrsm.core.User', 102 debug: bool = False, 103 **kw: Any 104) -> SuccessTuple: 105 """Update an existing user's metadata.""" 106 from meerschaum.utils.packages import attempt_import 107 sqlalchemy = attempt_import('sqlalchemy') 108 from meerschaum.connectors.sql.tables import get_tables 109 from meerschaum.utils.sql import json_flavors 110 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 111 112 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 113 if user_id is None: 114 return False, ( 115 f"User '{user.username}' does not exist. " + 116 f"Register user '{user.username}' before editing." 117 ) 118 user.user_id = user_id 119 120 import json 121 valid_tuple = valid_username(user.username) 122 if not valid_tuple[0]: 123 return valid_tuple 124 125 bind_variables = { 126 'user_id' : user_id, 127 'username' : user.username, 128 } 129 if user.password != '': 130 bind_variables['password_hash'] = user.password_hash 131 if user.email != '': 132 bind_variables['email'] = user.email 133 if user.attributes is not None and user.attributes != {}: 134 bind_variables['attributes'] = ( 135 json.dumps(user.attributes) if self.flavor in ('duckdb',) 136 else user.attributes 137 ) 138 if user.type != '': 139 bind_variables['user_type'] = user.type 140 141 query = ( 142 sqlalchemy 143 .update(users_tbl) 144 .values(**bind_variables) 145 .where(users_tbl.c.user_id == user_id) 146 ) 147 148 result = self.exec(query, debug=debug) 149 if result is None: 150 return False, f"Failed to edit user '{user}'." 151 return True, f"Successfully edited user '{user}'."
Update an existing user's metadata.
216def delete_user( 217 self, 218 user: 'mrsm.core.User', 219 debug: bool = False 220) -> SuccessTuple: 221 """Delete a user's record from the users table.""" 222 ### ensure users table exists 223 from meerschaum.connectors.sql.tables import get_tables 224 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 225 plugins = get_tables(mrsm_instance=self, debug=debug)['plugins'] 226 from meerschaum.utils.packages import attempt_import 227 sqlalchemy = attempt_import('sqlalchemy') 228 229 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 230 231 if user_id is None: 232 return False, f"User '{user.username}' is not registered and cannot be deleted." 233 234 query = sqlalchemy.delete(users_tbl).where(users_tbl.c.user_id == user_id) 235 236 result = self.exec(query, debug=debug) 237 if result is None: 238 return False, f"Failed to delete user '{user}'." 239 240 query = sqlalchemy.delete(plugins).where(plugins.c.user_id == user_id) 241 result = self.exec(query, debug=debug) 242 if result is None: 243 return False, f"Failed to delete plugins of user '{user}'." 244 245 return True, f"Successfully deleted user '{user}'"
Delete a user's record from the users table.
267def get_user_password_hash( 268 self, 269 user: 'mrsm.core.User', 270 debug: bool = False, 271 **kw: Any 272) -> Optional[str]: 273 """ 274 Return the password has for a user. 275 **NOTE**: This may be dangerous and is only allowed if the security settings explicity allow it. 276 """ 277 from meerschaum.utils.debug import dprint 278 from meerschaum.connectors.sql.tables import get_tables 279 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 280 from meerschaum.utils.packages import attempt_import 281 sqlalchemy = attempt_import('sqlalchemy') 282 283 if user.user_id is not None: 284 user_id = user.user_id 285 if debug: 286 dprint(f"Already given user_id: {user_id}") 287 else: 288 if debug: 289 dprint("Fetching user_id...") 290 user_id = self.get_user_id(user, debug=debug) 291 292 if user_id is None: 293 return None 294 295 query = sqlalchemy.select(users_tbl.c.password_hash).where(users_tbl.c.user_id == user_id) 296 297 return self.value(query, debug=debug)
Return the password has for a user. NOTE: This may be dangerous and is only allowed if the security settings explicity allow it.
300def get_user_type( 301 self, 302 user: 'mrsm.core.User', 303 debug: bool = False, 304 **kw: Any 305) -> Optional[str]: 306 """ 307 Return the user's type. 308 """ 309 from meerschaum.connectors.sql.tables import get_tables 310 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 311 from meerschaum.utils.packages import attempt_import 312 sqlalchemy = attempt_import('sqlalchemy') 313 314 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 315 316 if user_id is None: 317 return None 318 319 query = sqlalchemy.select(users_tbl.c.user_type).where(users_tbl.c.user_id == user_id) 320 321 return self.value(query, debug=debug)
Return the user's type.
176def get_user_attributes( 177 self, 178 user: 'mrsm.core.User', 179 debug: bool = False 180) -> Union[Dict[str, Any], None]: 181 """ 182 Return the user's attributes. 183 """ 184 ### ensure users table exists 185 from meerschaum.utils.warnings import warn 186 from meerschaum.utils.packages import attempt_import 187 sqlalchemy = attempt_import('sqlalchemy') 188 from meerschaum.connectors.sql.tables import get_tables 189 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 190 191 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 192 193 query = ( 194 sqlalchemy.select(users_tbl.c.attributes) 195 .where(users_tbl.c.user_id == user_id) 196 ) 197 198 result = self.value(query, debug=debug) 199 if result is not None and not isinstance(result, dict): 200 try: 201 result = dict(result) 202 _parsed = True 203 except Exception as e: 204 _parsed = False 205 if not _parsed: 206 try: 207 import json 208 result = json.loads(result) 209 _parsed = True 210 except Exception as e: 211 _parsed = False 212 if not _parsed: 213 warn(f"Received unexpected type for attributes: {result}") 214 return result
Return the user's attributes.
15@classmethod 16def from_uri( 17 cls, 18 uri: str, 19 label: Optional[str] = None, 20 as_dict: bool = False, 21 ) -> Union[ 22 'meerschaum.connectors.SQLConnector', 23 Dict[str, Union[str, int]], 24 ]: 25 """ 26 Create a new SQLConnector from a URI string. 27 28 Parameters 29 ---------- 30 uri: str 31 The URI connection string. 32 33 label: Optional[str], default None 34 If provided, use this as the connector label. 35 Otherwise use the determined database name. 36 37 as_dict: bool, default False 38 If `True`, return a dictionary of the keyword arguments 39 necessary to create a new `SQLConnector`, otherwise create a new object. 40 41 Returns 42 ------- 43 A new SQLConnector object or a dictionary of attributes (if `as_dict` is `True`). 44 """ 45 46 params = cls.parse_uri(uri) 47 params['uri'] = uri 48 flavor = params.get('flavor', None) 49 if not flavor or flavor not in cls.flavor_configs: 50 error(f"Invalid flavor '{flavor}' detected from the provided URI.") 51 52 if 'database' not in params: 53 error("Unable to determine the database from the provided URI.") 54 55 if flavor in ('sqlite', 'duckdb'): 56 if params['database'] == ':memory:': 57 params['label'] = label or f'memory_{flavor}' 58 else: 59 params['label'] = label or params['database'].split(os.path.sep)[-1].lower() 60 else: 61 params['label'] = label or ( 62 ( 63 (params['username'] + '@' if 'username' in params else '') 64 + params.get('host', '') 65 + ('/' if 'host' in params else '') 66 + params.get('database', '') 67 ).lower() 68 ) 69 70 return cls(**params) if not as_dict else params
Create a new SQLConnector from a URI string.
Parameters
- uri (str): The URI connection string.
- label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
- as_dict (bool, default False):
If
True
, return a dictionary of the keyword arguments necessary to create a newSQLConnector
, otherwise create a new object.
Returns
- A new SQLConnector object or a dictionary of attributes (if
as_dict
isTrue
).
73@staticmethod 74def parse_uri(uri: str) -> Dict[str, Any]: 75 """ 76 Parse a URI string into a dictionary of parameters. 77 78 Parameters 79 ---------- 80 uri: str 81 The database connection URI. 82 83 Returns 84 ------- 85 A dictionary of attributes. 86 87 Examples 88 -------- 89 >>> parse_uri('sqlite:////home/foo/bar.db') 90 {'database': '/home/foo/bar.db', 'flavor': 'sqlite'} 91 >>> parse_uri( 92 ... 'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439' 93 ... + '/master?driver=ODBC+Driver+17+for+SQL+Server' 94 ... ) 95 {'host': 'localhost', 'database': 'master', 'username': 'sa', 96 'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql', 97 'driver': 'ODBC Driver 17 for SQL Server'} 98 >>> 99 """ 100 from urllib.parse import parse_qs, urlparse 101 sqlalchemy = attempt_import('sqlalchemy') 102 parser = sqlalchemy.engine.url.make_url 103 params = parser(uri).translate_connect_args() 104 params['flavor'] = uri.split(':')[0].split('+')[0] 105 if params['flavor'] == 'postgres': 106 params['flavor'] = 'postgresql' 107 if '?' in uri: 108 parsed_uri = urlparse(uri) 109 for key, value in parse_qs(parsed_uri.query).items(): 110 params.update({key: value[0]}) 111 112 if '--search_path' in params.get('options', ''): 113 params.update({'schema': params['options'].replace('--search_path=', '', 1)}) 114 return params
Parse a URI string into a dictionary of parameters.
Parameters
- uri (str): The database connection URI.
Returns
- A dictionary of attributes.
Examples
>>> parse_uri('sqlite:////home/foo/bar.db')
{'database': '/home/foo/bar.db', 'flavor': 'sqlite'}
>>> parse_uri(
... 'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439'
... + '/master?driver=ODBC+Driver+17+for+SQL+Server'
... )
{'host': 'localhost', 'database': 'master', 'username': 'sa',
'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql',
'driver': 'ODBC Driver 17 for SQL Server'}
>>>
20class APIConnector(Connector): 21 """ 22 Connect to a Meerschaum API instance. 23 """ 24 25 IS_INSTANCE: bool = True 26 IS_THREAD_SAFE: bool = False 27 28 OPTIONAL_ATTRIBUTES: List[str] = ['port'] 29 30 from ._request import ( 31 make_request, 32 get, 33 post, 34 put, 35 patch, 36 delete, 37 wget, 38 ) 39 from ._actions import ( 40 get_actions, 41 do_action, 42 do_action_async, 43 do_action_legacy, 44 ) 45 from ._misc import get_mrsm_version, get_chaining_status 46 from ._pipes import ( 47 register_pipe, 48 fetch_pipes_keys, 49 edit_pipe, 50 sync_pipe, 51 delete_pipe, 52 get_pipe_data, 53 get_pipe_id, 54 get_pipe_attributes, 55 get_sync_time, 56 pipe_exists, 57 create_metadata, 58 get_pipe_rowcount, 59 drop_pipe, 60 clear_pipe, 61 get_pipe_columns_types, 62 get_pipe_columns_indices, 63 ) 64 from ._fetch import fetch 65 from ._plugins import ( 66 register_plugin, 67 install_plugin, 68 delete_plugin, 69 get_plugins, 70 get_plugin_attributes, 71 ) 72 from ._login import login, test_connection 73 from ._users import ( 74 register_user, 75 get_user_id, 76 get_users, 77 edit_user, 78 delete_user, 79 get_user_password_hash, 80 get_user_type, 81 get_user_attributes, 82 ) 83 from ._uri import from_uri 84 from ._jobs import ( 85 get_jobs, 86 get_job, 87 get_job_metadata, 88 get_job_properties, 89 get_job_exists, 90 delete_job, 91 start_job, 92 create_job, 93 stop_job, 94 pause_job, 95 get_logs, 96 get_job_stop_time, 97 monitor_logs, 98 monitor_logs_async, 99 get_job_is_blocking_on_stdin, 100 get_job_began, 101 get_job_ended, 102 get_job_paused, 103 get_job_status, 104 ) 105 106 def __init__( 107 self, 108 label: Optional[str] = None, 109 wait: bool = False, 110 debug: bool = False, 111 **kw 112 ): 113 if 'uri' in kw: 114 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 115 label = label or from_uri_params.get('label', None) 116 _ = from_uri_params.pop('label', None) 117 kw.update(from_uri_params) 118 119 super().__init__('api', label=label, **kw) 120 if 'protocol' not in self.__dict__: 121 self.protocol = ( 122 'https' if self.__dict__.get('uri', '').startswith('https') 123 else 'http' 124 ) 125 126 if 'uri' not in self.__dict__: 127 self.verify_attributes(required_attributes) 128 else: 129 from meerschaum.connectors.sql import SQLConnector 130 conn_attrs = SQLConnector.parse_uri(self.__dict__['uri']) 131 if 'host' not in conn_attrs: 132 raise Exception(f"Invalid URI for '{self}'.") 133 self.__dict__.update(conn_attrs) 134 135 self.url = ( 136 self.protocol + '://' + 137 self.host 138 + ( 139 (':' + str(self.port)) 140 if self.__dict__.get('port', None) 141 else '' 142 ) 143 ) 144 self._token = None 145 self._expires = None 146 self._session = None 147 148 149 @property 150 def URI(self) -> str: 151 """ 152 Return the fully qualified URI. 153 """ 154 username = self.__dict__.get('username', None) 155 password = self.__dict__.get('password', None) 156 creds = (username + ':' + password + '@') if username and password else '' 157 return ( 158 self.protocol 159 + '://' 160 + creds 161 + self.host 162 + ( 163 (':' + str(self.port)) 164 if self.__dict__.get('port', None) 165 else '' 166 ) 167 ) 168 169 170 @property 171 def session(self): 172 if self._session is None: 173 certifi = attempt_import('certifi', lazy=False) 174 requests = attempt_import('requests', lazy=False) 175 if requests: 176 self._session = requests.Session() 177 if self._session is None: 178 error(f"Failed to import requests. Is requests installed?") 179 return self._session 180 181 @property 182 def token(self): 183 expired = ( 184 True if self._expires is None else ( 185 ( 186 self._expires 187 < 188 datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1) 189 ) 190 ) 191 ) 192 193 if self._token is None or expired: 194 success, msg = self.login() 195 if not success: 196 warn(msg, stack=False) 197 return self._token
Connect to a Meerschaum API instance.
106 def __init__( 107 self, 108 label: Optional[str] = None, 109 wait: bool = False, 110 debug: bool = False, 111 **kw 112 ): 113 if 'uri' in kw: 114 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 115 label = label or from_uri_params.get('label', None) 116 _ = from_uri_params.pop('label', None) 117 kw.update(from_uri_params) 118 119 super().__init__('api', label=label, **kw) 120 if 'protocol' not in self.__dict__: 121 self.protocol = ( 122 'https' if self.__dict__.get('uri', '').startswith('https') 123 else 'http' 124 ) 125 126 if 'uri' not in self.__dict__: 127 self.verify_attributes(required_attributes) 128 else: 129 from meerschaum.connectors.sql import SQLConnector 130 conn_attrs = SQLConnector.parse_uri(self.__dict__['uri']) 131 if 'host' not in conn_attrs: 132 raise Exception(f"Invalid URI for '{self}'.") 133 self.__dict__.update(conn_attrs) 134 135 self.url = ( 136 self.protocol + '://' + 137 self.host 138 + ( 139 (':' + str(self.port)) 140 if self.__dict__.get('port', None) 141 else '' 142 ) 143 ) 144 self._token = None 145 self._expires = None 146 self._session = None
149 @property 150 def URI(self) -> str: 151 """ 152 Return the fully qualified URI. 153 """ 154 username = self.__dict__.get('username', None) 155 password = self.__dict__.get('password', None) 156 creds = (username + ':' + password + '@') if username and password else '' 157 return ( 158 self.protocol 159 + '://' 160 + creds 161 + self.host 162 + ( 163 (':' + str(self.port)) 164 if self.__dict__.get('port', None) 165 else '' 166 ) 167 )
Return the fully qualified URI.
170 @property 171 def session(self): 172 if self._session is None: 173 certifi = attempt_import('certifi', lazy=False) 174 requests = attempt_import('requests', lazy=False) 175 if requests: 176 self._session = requests.Session() 177 if self._session is None: 178 error(f"Failed to import requests. Is requests installed?") 179 return self._session
181 @property 182 def token(self): 183 expired = ( 184 True if self._expires is None else ( 185 ( 186 self._expires 187 < 188 datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1) 189 ) 190 ) 191 ) 192 193 if self._token is None or expired: 194 success, msg = self.login() 195 if not success: 196 warn(msg, stack=False) 197 return self._token
28def make_request( 29 self, 30 method: str, 31 r_url: str, 32 headers: Optional[Dict[str, Any]] = None, 33 use_token: bool = True, 34 debug: bool = False, 35 **kwargs: Any 36) -> 'requests.Response': 37 """ 38 Make a request to this APIConnector's endpoint using the in-memory session. 39 40 Parameters 41 ---------- 42 method: str 43 The kind of request to make. 44 Accepted values: 45 - `'GET'` 46 - `'OPTIONS'` 47 - `'HEAD'` 48 - `'POST'` 49 - `'PUT'` 50 - `'PATCH'` 51 - `'DELETE'` 52 53 r_url: str 54 The relative URL for the endpoint (e.g. `'/pipes'`). 55 56 headers: Optional[Dict[str, Any]], default None 57 The headers to use for the request. 58 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 59 60 use_token: bool, default True 61 If `True`, add the authorization token to the headers. 62 63 debug: bool, default False 64 Verbosity toggle. 65 66 kwargs: Any 67 All other keyword arguments are passed to `requests.request`. 68 69 Returns 70 ------- 71 A `requests.Reponse` object. 72 """ 73 if method.upper() not in METHODS: 74 raise ValueError(f"Method '{method}' is not supported.") 75 76 verify = self.__dict__.get('verify', None) 77 if 'verify' not in kwargs and isinstance(verify, bool): 78 kwargs['verify'] = verify 79 80 headers = ( 81 copy.deepcopy(headers) 82 if isinstance(headers, dict) 83 else {} 84 ) 85 86 if use_token: 87 headers.update({'Authorization': f'Bearer {self.token}'}) 88 89 if 'timeout' not in kwargs: 90 kwargs['timeout'] = STATIC_CONFIG['api']['default_timeout'] 91 92 request_url = urllib.parse.urljoin(self.url, r_url) 93 if debug: 94 dprint(f"[{self}] Sending a '{method.upper()}' request to {request_url}") 95 96 return self.session.request( 97 method.upper(), 98 request_url, 99 headers = headers, 100 **kwargs 101 )
Make a request to this APIConnector's endpoint using the in-memory session.
Parameters
- method (str):
The kind of request to make.
Accepted values:
'GET'
'OPTIONS'
'HEAD'
'POST'
'PUT'
'PATCH'
'DELETE'
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
104def get(self, r_url: str, **kwargs: Any) -> 'requests.Response': 105 """ 106 Wrapper for `requests.get`. 107 108 Parameters 109 ---------- 110 r_url: str 111 The relative URL for the endpoint (e.g. `'/pipes'`). 112 113 headers: Optional[Dict[str, Any]], default None 114 The headers to use for the request. 115 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 116 117 use_token: bool, default True 118 If `True`, add the authorization token to the headers. 119 120 debug: bool, default False 121 Verbosity toggle. 122 123 kwargs: Any 124 All other keyword arguments are passed to `requests.request`. 125 126 Returns 127 ------- 128 A `requests.Reponse` object. 129 130 """ 131 return self.make_request('GET', r_url, **kwargs)
Wrapper for requests.get
.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
134def post(self, r_url: str, **kwargs: Any) -> 'requests.Response': 135 """ 136 Wrapper for `requests.post`. 137 138 Parameters 139 ---------- 140 r_url: str 141 The relative URL for the endpoint (e.g. `'/pipes'`). 142 143 headers: Optional[Dict[str, Any]], default None 144 The headers to use for the request. 145 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 146 147 use_token: bool, default True 148 If `True`, add the authorization token to the headers. 149 150 debug: bool, default False 151 Verbosity toggle. 152 153 kwargs: Any 154 All other keyword arguments are passed to `requests.request`. 155 156 Returns 157 ------- 158 A `requests.Reponse` object. 159 160 """ 161 return self.make_request('POST', r_url, **kwargs)
Wrapper for requests.post
.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
193def put(self, r_url: str, **kwargs: Any) -> 'requests.Response': 194 """ 195 Wrapper for `requests.put`. 196 197 Parameters 198 ---------- 199 r_url: str 200 The relative URL for the endpoint (e.g. `'/pipes'`). 201 202 headers: Optional[Dict[str, Any]], default None 203 The headers to use for the request. 204 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 205 206 use_token: bool, default True 207 If `True`, add the authorization token to the headers. 208 209 debug: bool, default False 210 Verbosity toggle. 211 212 kwargs: Any 213 All other keyword arguments are passed to `requests.request`. 214 215 Returns 216 ------- 217 A `requests.Reponse` object. 218 """ 219 return self.make_request('PUT', r_url, **kwargs)
Wrapper for requests.put
.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
164def patch(self, r_url: str, **kwargs: Any) -> 'requests.Response': 165 """ 166 Wrapper for `requests.patch`. 167 168 Parameters 169 ---------- 170 r_url: str 171 The relative URL for the endpoint (e.g. `'/pipes'`). 172 173 headers: Optional[Dict[str, Any]], default None 174 The headers to use for the request. 175 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 176 177 use_token: bool, default True 178 If `True`, add the authorization token to the headers. 179 180 debug: bool, default False 181 Verbosity toggle. 182 183 kwargs: Any 184 All other keyword arguments are passed to `requests.request`. 185 186 Returns 187 ------- 188 A `requests.Reponse` object. 189 """ 190 return self.make_request('PATCH', r_url, **kwargs)
Wrapper for requests.patch
.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
222def delete(self, r_url: str, **kwargs: Any) -> 'requests.Response': 223 """ 224 Wrapper for `requests.delete`. 225 226 Parameters 227 ---------- 228 r_url: str 229 The relative URL for the endpoint (e.g. `'/pipes'`). 230 231 headers: Optional[Dict[str, Any]], default None 232 The headers to use for the request. 233 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 234 235 use_token: bool, default True 236 If `True`, add the authorization token to the headers. 237 238 debug: bool, default False 239 Verbosity toggle. 240 241 kwargs: Any 242 All other keyword arguments are passed to `requests.request`. 243 244 Returns 245 ------- 246 A `requests.Reponse` object. 247 """ 248 return self.make_request('DELETE', r_url, **kwargs)
Wrapper for requests.delete
.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'
). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_token
isTrue
, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True
, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request
.
Returns
- A
requests.Reponse
object.
251def wget( 252 self, 253 r_url: str, 254 dest: Optional[Union[str, pathlib.Path]] = None, 255 headers: Optional[Dict[str, Any]] = None, 256 use_token: bool = True, 257 debug: bool = False, 258 **kw: Any 259 ) -> pathlib.Path: 260 """Mimic wget with requests. 261 """ 262 from meerschaum.utils.misc import wget 263 if headers is None: 264 headers = {} 265 if use_token: 266 headers.update({'Authorization': f'Bearer {self.token}'}) 267 request_url = urllib.parse.urljoin(self.url, r_url) 268 if debug: 269 dprint( 270 f"[{self}] Downloading {request_url}" 271 + (f' to {dest}' if dest is not None else '') 272 + "..." 273 ) 274 return wget(request_url, dest=dest, headers=headers, **kw)
Mimic wget with requests.
24def get_actions(self): 25 """Get available actions from the API instance.""" 26 return self.get(ACTIONS_ENDPOINT)
Get available actions from the API instance.
29def do_action(self, sysargs: List[str]) -> SuccessTuple: 30 """ 31 Execute a Meerschaum action remotely. 32 """ 33 return asyncio.run(self.do_action_async(sysargs))
Execute a Meerschaum action remotely.
36async def do_action_async( 37 self, 38 sysargs: List[str], 39 callback_function: Callable[[str], None] = partial(print, end=''), 40) -> SuccessTuple: 41 """ 42 Execute an action as a temporary remote job. 43 """ 44 from meerschaum._internal.arguments import remove_api_executor_keys 45 from meerschaum.utils.misc import generate_password 46 sysargs = remove_api_executor_keys(sysargs) 47 48 job_name = TEMP_PREFIX + generate_password(12) 49 job = mrsm.Job(job_name, sysargs, executor_keys=str(self)) 50 51 start_success, start_msg = job.start() 52 if not start_success: 53 return start_success, start_msg 54 55 await job.monitor_logs_async( 56 callback_function=callback_function, 57 stop_on_exit=True, 58 strip_timestamps=True, 59 ) 60 61 success, msg = job.result 62 job.delete() 63 return success, msg
Execute an action as a temporary remote job.
66def do_action_legacy( 67 self, 68 action: Optional[List[str]] = None, 69 sysargs: Optional[List[str]] = None, 70 debug: bool = False, 71 **kw 72) -> SuccessTuple: 73 """ 74 NOTE: This method is deprecated. 75 Please use `do_action()` or `do_action_async()`. 76 77 Execute a Meerschaum action remotely. 78 79 If `sysargs` are provided, parse those instead. 80 Otherwise infer everything from keyword arguments. 81 82 Examples 83 -------- 84 >>> conn = mrsm.get_connector('api:main') 85 >>> conn.do_action(['show', 'pipes']) 86 (True, "Success") 87 >>> conn.do_action(['show', 'arguments'], name='test') 88 (True, "Success") 89 """ 90 import sys, json 91 from meerschaum.utils.debug import dprint 92 from meerschaum.config.static import STATIC_CONFIG 93 from meerschaum.utils.misc import json_serialize_datetime 94 if action is None: 95 action = [] 96 97 if sysargs is not None and action and action[0] == '': 98 from meerschaum._internal.arguments import parse_arguments 99 if debug: 100 dprint(f"Parsing sysargs:\n{sysargs}") 101 json_dict = parse_arguments(sysargs) 102 else: 103 json_dict = kw 104 json_dict['action'] = action 105 if 'noask' not in kw: 106 json_dict['noask'] = True 107 if 'yes' not in kw: 108 json_dict['yes'] = True 109 if debug: 110 json_dict['debug'] = debug 111 112 root_action = json_dict['action'][0] 113 del json_dict['action'][0] 114 r_url = f"{STATIC_CONFIG['api']['endpoints']['actions']}/{root_action}" 115 116 if debug: 117 from meerschaum.utils.formatting import pprint 118 dprint(f"Sending data to '{self.url + r_url}':") 119 pprint(json_dict, stream=sys.stderr) 120 121 response = self.post( 122 r_url, 123 data = json.dumps(json_dict, default=json_serialize_datetime), 124 debug = debug, 125 ) 126 try: 127 response_list = json.loads(response.text) 128 if isinstance(response_list, dict) and 'detail' in response_list: 129 return False, response_list['detail'] 130 except Exception as e: 131 print(f"Invalid response: {response}") 132 print(e) 133 return False, response.text 134 if debug: 135 dprint(response) 136 try: 137 return response_list[0], response_list[1] 138 except Exception as e: 139 return False, f"Failed to parse result from action '{root_action}'"
NOTE: This method is deprecated.
Please use do_action()
or do_action_async()
.
Execute a Meerschaum action remotely.
If sysargs
are provided, parse those instead.
Otherwise infer everything from keyword arguments.
Examples
>>> conn = mrsm.get_connector('api:main')
>>> conn.do_action(['show', 'pipes'])
(True, "Success")
>>> conn.do_action(['show', 'arguments'], name='test')
(True, "Success")
13def get_mrsm_version(self, **kw) -> Optional[str]: 14 """ 15 Return the Meerschaum version of the API instance. 16 """ 17 from meerschaum.config.static import STATIC_CONFIG 18 try: 19 j = self.get( 20 STATIC_CONFIG['api']['endpoints']['version'] + '/mrsm', 21 use_token=False, 22 **kw 23 ).json() 24 except Exception as e: 25 return None 26 if isinstance(j, dict) and 'detail' in j: 27 return None 28 return j
Return the Meerschaum version of the API instance.
30def get_chaining_status(self, **kw) -> Optional[bool]: 31 """ 32 Fetch the chaining status of the API instance. 33 """ 34 from meerschaum.config.static import STATIC_CONFIG 35 try: 36 response = self.get( 37 STATIC_CONFIG['api']['endpoints']['chaining'], 38 use_token = True, 39 **kw 40 ) 41 if not response: 42 return None 43 except Exception as e: 44 return None 45 46 return response.json()
Fetch the chaining status of the API instance.
34def register_pipe( 35 self, 36 pipe: mrsm.Pipe, 37 debug: bool = False 38) -> SuccessTuple: 39 """Submit a POST to the API to register a new Pipe object. 40 Returns a tuple of (success_bool, response_dict). 41 """ 42 from meerschaum.utils.debug import dprint 43 from meerschaum.config.static import STATIC_CONFIG 44 ### NOTE: if `parameters` is supplied in the Pipe constructor, 45 ### then `pipe.parameters` will exist and not be fetched from the database. 46 r_url = pipe_r_url(pipe) 47 response = self.post( 48 r_url + '/register', 49 json = pipe.parameters, 50 debug = debug, 51 ) 52 if debug: 53 dprint(response.text) 54 55 if not response: 56 return False, response.text 57 58 response_data = response.json() 59 if isinstance(response_data, list): 60 response_tuple = response_data[0], response_data[1] 61 elif 'detail' in response.json(): 62 response_tuple = response.__bool__(), response_data['detail'] 63 else: 64 response_tuple = response.__bool__(), response.text 65 return response_tuple
Submit a POST to the API to register a new Pipe object. Returns a tuple of (success_bool, response_dict).
101def fetch_pipes_keys( 102 self, 103 connector_keys: Optional[List[str]] = None, 104 metric_keys: Optional[List[str]] = None, 105 location_keys: Optional[List[str]] = None, 106 tags: Optional[List[str]] = None, 107 params: Optional[Dict[str, Any]] = None, 108 debug: bool = False 109) -> Union[List[Tuple[str, str, Union[str, None]]]]: 110 """ 111 Fetch registered Pipes' keys from the API. 112 113 Parameters 114 ---------- 115 connector_keys: Optional[List[str]], default None 116 The connector keys for the query. 117 118 metric_keys: Optional[List[str]], default None 119 The metric keys for the query. 120 121 location_keys: Optional[List[str]], default None 122 The location keys for the query. 123 124 tags: Optional[List[str]], default None 125 A list of tags for the query. 126 127 params: Optional[Dict[str, Any]], default None 128 A parameters dictionary for filtering against the `pipes` table 129 (e.g. `{'connector_keys': 'plugin:foo'}`). 130 Not recommeded to be used. 131 132 debug: bool, default False 133 Verbosity toggle. 134 135 Returns 136 ------- 137 A list of tuples containing pipes' keys. 138 """ 139 from meerschaum.config.static import STATIC_CONFIG 140 if connector_keys is None: 141 connector_keys = [] 142 if metric_keys is None: 143 metric_keys = [] 144 if location_keys is None: 145 location_keys = [] 146 if tags is None: 147 tags = [] 148 149 r_url = STATIC_CONFIG['api']['endpoints']['pipes'] + '/keys' 150 try: 151 j = self.get( 152 r_url, 153 params = { 154 'connector_keys': json.dumps(connector_keys), 155 'metric_keys': json.dumps(metric_keys), 156 'location_keys': json.dumps(location_keys), 157 'tags': json.dumps(tags), 158 'params': json.dumps(params), 159 }, 160 debug=debug 161 ).json() 162 except Exception as e: 163 error(str(e)) 164 165 if 'detail' in j: 166 error(j['detail'], stack=False) 167 return [tuple(r) for r in j]
Fetch registered Pipes' keys from the API.
Parameters
- connector_keys (Optional[List[str]], default None): The connector keys for the query.
- metric_keys (Optional[List[str]], default None): The metric keys for the query.
- location_keys (Optional[List[str]], default None): The location keys for the query.
- tags (Optional[List[str]], default None): A list of tags for the query.
- params (Optional[Dict[str, Any]], default None):
A parameters dictionary for filtering against the
pipes
table (e.g.{'connector_keys': 'plugin:foo'}
). Not recommeded to be used. - debug (bool, default False): Verbosity toggle.
Returns
- A list of tuples containing pipes' keys.
68def edit_pipe( 69 self, 70 pipe: mrsm.Pipe, 71 patch: bool = False, 72 debug: bool = False, 73) -> SuccessTuple: 74 """Submit a PATCH to the API to edit an existing Pipe object. 75 Returns a tuple of (success_bool, response_dict). 76 """ 77 from meerschaum.utils.debug import dprint 78 ### NOTE: if `parameters` is supplied in the Pipe constructor, 79 ### then `pipe.parameters` will exist and not be fetched from the database. 80 r_url = pipe_r_url(pipe) 81 response = self.patch( 82 r_url + '/edit', 83 params = {'patch': patch,}, 84 json = pipe.parameters, 85 debug = debug, 86 ) 87 if debug: 88 dprint(response.text) 89 90 response_data = response.json() 91 92 if isinstance(response.json(), list): 93 response_tuple = response_data[0], response_data[1] 94 elif 'detail' in response.json(): 95 response_tuple = response.__bool__(), response_data['detail'] 96 else: 97 response_tuple = response.__bool__(), response.text 98 return response_tuple
Submit a PATCH to the API to edit an existing Pipe object. Returns a tuple of (success_bool, response_dict).
170def sync_pipe( 171 self, 172 pipe: mrsm.Pipe, 173 df: Optional[Union['pd.DataFrame', Dict[Any, Any], str]] = None, 174 chunksize: Optional[int] = -1, 175 debug: bool = False, 176 **kw: Any 177) -> SuccessTuple: 178 """Sync a DataFrame into a Pipe.""" 179 from decimal import Decimal 180 from meerschaum.utils.debug import dprint 181 from meerschaum.utils.misc import json_serialize_datetime, items_str 182 from meerschaum.config import get_config 183 from meerschaum.utils.packages import attempt_import 184 from meerschaum.utils.dataframe import get_numeric_cols, to_json 185 begin = time.time() 186 more_itertools = attempt_import('more_itertools') 187 if df is None: 188 msg = f"DataFrame is `None`. Cannot sync {pipe}." 189 return False, msg 190 191 def get_json_str(c): 192 ### allow syncing dict or JSON without needing to import pandas (for IOT devices) 193 if isinstance(c, (dict, list)): 194 return json.dumps(c, default=json_serialize_datetime) 195 return to_json(c, orient='columns') 196 197 df = json.loads(df) if isinstance(df, str) else df 198 199 _chunksize: Optional[int] = (1 if chunksize is None else ( 200 get_config('system', 'connectors', 'sql', 'chunksize') if chunksize == -1 201 else chunksize 202 )) 203 keys: List[str] = list(df.columns) 204 chunks = [] 205 if hasattr(df, 'index'): 206 df = df.reset_index(drop=True) 207 is_dask = 'dask' in df.__module__ 208 chunks = ( 209 (df.iloc[i] for i in more_itertools.chunked(df.index, _chunksize)) 210 if not is_dask 211 else [partition.compute() for partition in df.partitions] 212 ) 213 214 numeric_cols = get_numeric_cols(df) 215 if numeric_cols: 216 for col in numeric_cols: 217 df[col] = df[col].apply(lambda x: f'{x:f}' if isinstance(x, Decimal) else x) 218 pipe_dtypes = pipe.dtypes 219 new_numeric_cols = [ 220 col 221 for col in numeric_cols 222 if pipe_dtypes.get(col, None) != 'numeric' 223 ] 224 pipe.dtypes.update({ 225 col: 'numeric' 226 for col in new_numeric_cols 227 }) 228 edit_success, edit_msg = pipe.edit(debug=debug) 229 if not edit_success: 230 warn( 231 "Failed to update new numeric columns " 232 + f"{items_str(new_numeric_cols)}:\n{edit_msg}" 233 ) 234 elif isinstance(df, dict): 235 ### `_chunks` is a dict of lists of dicts. 236 ### e.g. {'a' : [ {'a':[1, 2]}, {'a':[3, 4]} ] } 237 _chunks = {k: [] for k in keys} 238 for k in keys: 239 chunk_iter = more_itertools.chunked(df[k], _chunksize) 240 for l in chunk_iter: 241 _chunks[k].append({k: l}) 242 243 ### `chunks` is a list of dicts (e.g. orient by rows in pandas JSON). 244 for k, l in _chunks.items(): 245 for i, c in enumerate(l): 246 try: 247 chunks[i].update(c) 248 except IndexError: 249 chunks.append(c) 250 elif isinstance(df, list): 251 chunks = (df[i] for i in more_itertools.chunked(df, _chunksize)) 252 253 ### Send columns in case the user has defined them locally. 254 if pipe.columns: 255 kw['columns'] = json.dumps(pipe.columns) 256 r_url = pipe_r_url(pipe) + '/data' 257 258 rowcount = 0 259 num_success_chunks = 0 260 for i, c in enumerate(chunks): 261 if debug: 262 dprint(f"[{self}] Posting chunk {i} to {r_url}...") 263 if len(c) == 0: 264 if debug: 265 dprint(f"[{self}] Skipping empty chunk...") 266 continue 267 json_str = get_json_str(c) 268 269 try: 270 response = self.post( 271 r_url, 272 ### handles check_existing 273 params = kw, 274 data = json_str, 275 debug = debug 276 ) 277 except Exception as e: 278 msg = f"Failed to post a chunk to {pipe}:\n{e}" 279 warn(msg) 280 return False, msg 281 282 if not response: 283 return False, f"Failed to sync a chunk:\n{response.text}" 284 285 try: 286 j = json.loads(response.text) 287 except Exception as e: 288 return False, f"Failed to parse response from syncing {pipe}:\n{e}" 289 290 if isinstance(j, dict) and 'detail' in j: 291 return False, j['detail'] 292 293 try: 294 j = tuple(j) 295 except Exception as e: 296 return False, response.text 297 298 if debug: 299 dprint("Received response: " + str(j)) 300 if not j[0]: 301 return j 302 303 rowcount += len(c) 304 num_success_chunks += 1 305 306 success_tuple = True, ( 307 f"It took {round(time.time() - begin, 2)} seconds to sync {rowcount} row" 308 + ('s' if rowcount != 1 else '') 309 + f" across {num_success_chunks} chunk" + ('s' if num_success_chunks != 1 else '') + 310 f" to {pipe}." 311 ) 312 return success_tuple
Sync a DataFrame into a Pipe.
315def delete_pipe( 316 self, 317 pipe: Optional[meerschaum.Pipe] = None, 318 debug: bool = None, 319) -> SuccessTuple: 320 """Delete a Pipe and drop its table.""" 321 if pipe is None: 322 error(f"Pipe cannot be None.") 323 r_url = pipe_r_url(pipe) 324 response = self.delete( 325 r_url + '/delete', 326 debug = debug, 327 ) 328 if debug: 329 dprint(response.text) 330 331 response_data = response.json() 332 if isinstance(response.json(), list): 333 response_tuple = response_data[0], response_data[1] 334 elif 'detail' in response.json(): 335 response_tuple = response.__bool__(), response_data['detail'] 336 else: 337 response_tuple = response.__bool__(), response.text 338 return response_tuple
Delete a Pipe and drop its table.
341def get_pipe_data( 342 self, 343 pipe: meerschaum.Pipe, 344 select_columns: Optional[List[str]] = None, 345 omit_columns: Optional[List[str]] = None, 346 begin: Union[str, datetime, int, None] = None, 347 end: Union[str, datetime, int, None] = None, 348 params: Optional[Dict[str, Any]] = None, 349 as_chunks: bool = False, 350 debug: bool = False, 351 **kw: Any 352) -> Union[pandas.DataFrame, None]: 353 """Fetch data from the API.""" 354 r_url = pipe_r_url(pipe) 355 chunks_list = [] 356 while True: 357 try: 358 response = self.get( 359 r_url + "/data", 360 params={ 361 'select_columns': json.dumps(select_columns), 362 'omit_columns': json.dumps(omit_columns), 363 'begin': begin, 364 'end': end, 365 'params': json.dumps(params, default=str) 366 }, 367 debug=debug 368 ) 369 if not response.ok: 370 return None 371 j = response.json() 372 except Exception as e: 373 warn(f"Failed to get data for {pipe}:\n{e}") 374 return None 375 if isinstance(j, dict) and 'detail' in j: 376 return False, j['detail'] 377 break 378 379 from meerschaum.utils.packages import import_pandas 380 from meerschaum.utils.dataframe import parse_df_datetimes, add_missing_cols_to_df 381 from meerschaum.utils.dtypes import are_dtypes_equal 382 pd = import_pandas() 383 try: 384 df = pd.read_json(StringIO(response.text)) 385 except Exception as e: 386 warn(f"Failed to parse response for {pipe}:\n{e}") 387 return None 388 389 if len(df.columns) == 0: 390 return add_missing_cols_to_df(df, pipe.dtypes) 391 392 df = parse_df_datetimes( 393 df, 394 ignore_cols = [ 395 col 396 for col, dtype in pipe.dtypes.items() 397 if not are_dtypes_equal(str(dtype), 'datetime') 398 ], 399 strip_timezone=(pipe.tzinfo is None), 400 debug=debug, 401 ) 402 return df
Fetch data from the API.
405def get_pipe_id( 406 self, 407 pipe: mrsm.Pipe, 408 debug: bool = False, 409) -> int: 410 """Get a Pipe's ID from the API.""" 411 from meerschaum.utils.misc import is_int 412 r_url = pipe_r_url(pipe) 413 response = self.get( 414 r_url + '/id', 415 debug = debug 416 ) 417 if debug: 418 dprint(f"Got pipe ID: {response.text}") 419 try: 420 if is_int(response.text): 421 return int(response.text) 422 except Exception as e: 423 warn(f"Failed to get the ID for {pipe}:\n{e}") 424 return None
Get a Pipe's ID from the API.
427def get_pipe_attributes( 428 self, 429 pipe: mrsm.Pipe, 430 debug: bool = False, 431) -> Dict[str, Any]: 432 """Get a Pipe's attributes from the API 433 434 Parameters 435 ---------- 436 pipe: meerschaum.Pipe 437 The pipe whose attributes we are fetching. 438 439 Returns 440 ------- 441 A dictionary of a pipe's attributes. 442 If the pipe does not exist, return an empty dictionary. 443 """ 444 r_url = pipe_r_url(pipe) 445 response = self.get(r_url + '/attributes', debug=debug) 446 try: 447 return json.loads(response.text) 448 except Exception as e: 449 warn(f"Failed to get the attributes for {pipe}:\n{e}") 450 return {}
Get a Pipe's attributes from the API
Parameters
- pipe (meerschaum.Pipe): The pipe whose attributes we are fetching.
Returns
- A dictionary of a pipe's attributes.
- If the pipe does not exist, return an empty dictionary.
453def get_sync_time( 454 self, 455 pipe: mrsm.Pipe, 456 params: Optional[Dict[str, Any]] = None, 457 newest: bool = True, 458 debug: bool = False, 459) -> Union[datetime, int, None]: 460 """Get a Pipe's most recent datetime value from the API. 461 462 Parameters 463 ---------- 464 pipe: meerschaum.Pipe 465 The pipe to select from. 466 467 params: Optional[Dict[str, Any]], default None 468 Optional params dictionary to build the WHERE clause. 469 470 newest: bool, default True 471 If `True`, get the most recent datetime (honoring `params`). 472 If `False`, get the oldest datetime (ASC instead of DESC). 473 474 Returns 475 ------- 476 The most recent (or oldest if `newest` is `False`) datetime of a pipe, 477 rounded down to the closest minute. 478 """ 479 from meerschaum.utils.misc import is_int 480 from meerschaum.utils.warnings import warn 481 r_url = pipe_r_url(pipe) 482 response = self.get( 483 r_url + '/sync_time', 484 json = params, 485 params = {'newest': newest, 'debug': debug}, 486 debug = debug, 487 ) 488 if not response: 489 warn(f"Failed to get the sync time for {pipe}:\n" + response.text) 490 return None 491 492 j = response.json() 493 if j is None: 494 dt = None 495 else: 496 try: 497 dt = ( 498 datetime.fromisoformat(j) 499 if not is_int(j) 500 else int(j) 501 ) 502 except Exception as e: 503 warn(f"Failed to parse the sync time '{j}' for {pipe}:\n{e}") 504 dt = None 505 return dt
Get a Pipe's most recent datetime value from the API.
Parameters
- pipe (meerschaum.Pipe): The pipe to select from.
- params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause.
- newest (bool, default True):
If
True
, get the most recent datetime (honoringparams
). IfFalse
, get the oldest datetime (ASC instead of DESC).
Returns
- The most recent (or oldest if
newest
isFalse
) datetime of a pipe, - rounded down to the closest minute.
508def pipe_exists( 509 self, 510 pipe: mrsm.Pipe, 511 debug: bool = False 512) -> bool: 513 """Check the API to see if a Pipe exists. 514 515 Parameters 516 ---------- 517 pipe: 'meerschaum.Pipe' 518 The pipe which were are querying. 519 520 Returns 521 ------- 522 A bool indicating whether a pipe's underlying table exists. 523 """ 524 from meerschaum.utils.debug import dprint 525 from meerschaum.utils.warnings import warn 526 r_url = pipe_r_url(pipe) 527 response = self.get(r_url + '/exists', debug=debug) 528 if not response: 529 warn(f"Failed to check if {pipe} exists:\n{response.text}") 530 return False 531 if debug: 532 dprint("Received response: " + str(response.text)) 533 j = response.json() 534 if isinstance(j, dict) and 'detail' in j: 535 warn(j['detail']) 536 return j
Check the API to see if a Pipe exists.
Parameters
- pipe ('meerschaum.Pipe'): The pipe which were are querying.
Returns
- A bool indicating whether a pipe's underlying table exists.
539def create_metadata( 540 self, 541 debug: bool = False 542) -> bool: 543 """Create metadata tables. 544 545 Returns 546 ------- 547 A bool indicating success. 548 """ 549 from meerschaum.utils.debug import dprint 550 from meerschaum.config.static import STATIC_CONFIG 551 r_url = STATIC_CONFIG['api']['endpoints']['metadata'] 552 response = self.post(r_url, debug=debug) 553 if debug: 554 dprint("Create metadata response: {response.text}") 555 try: 556 metadata_response = json.loads(response.text) 557 except Exception as e: 558 warn(f"Failed to create metadata on {self}:\n{e}") 559 metadata_response = False 560 return False
Create metadata tables.
Returns
- A bool indicating success.
563def get_pipe_rowcount( 564 self, 565 pipe: mrsm.Pipe, 566 begin: Optional[datetime] = None, 567 end: Optional[datetime] = None, 568 params: Optional[Dict[str, Any]] = None, 569 remote: bool = False, 570 debug: bool = False, 571) -> int: 572 """Get a pipe's row count from the API. 573 574 Parameters 575 ---------- 576 pipe: 'meerschaum.Pipe': 577 The pipe whose row count we are counting. 578 579 begin: Optional[datetime], default None 580 If provided, bound the count by this datetime. 581 582 end: Optional[datetime] 583 If provided, bound the count by this datetime. 584 585 params: Optional[Dict[str, Any]], default None 586 If provided, bound the count by these parameters. 587 588 remote: bool, default False 589 590 Returns 591 ------- 592 The number of rows in the pipe's table, bound the given parameters. 593 If the table does not exist, return 0. 594 """ 595 r_url = pipe_r_url(pipe) 596 response = self.get( 597 r_url + "/rowcount", 598 json = params, 599 params = { 600 'begin': begin, 601 'end': end, 602 'remote': remote, 603 }, 604 debug = debug 605 ) 606 if not response: 607 warn(f"Failed to get the rowcount for {pipe}:\n{response.text}") 608 return 0 609 try: 610 return int(json.loads(response.text)) 611 except Exception as e: 612 warn(f"Failed to get the rowcount for {pipe}:\n{e}") 613 return 0
Get a pipe's row count from the API.
Parameters
- pipe ('meerschaum.Pipe':): The pipe whose row count we are counting.
- begin (Optional[datetime], default None): If provided, bound the count by this datetime.
- end (Optional[datetime]): If provided, bound the count by this datetime.
- params (Optional[Dict[str, Any]], default None): If provided, bound the count by these parameters.
- remote (bool, default False):
Returns
- The number of rows in the pipe's table, bound the given parameters.
- If the table does not exist, return 0.
616def drop_pipe( 617 self, 618 pipe: mrsm.Pipe, 619 debug: bool = False 620) -> SuccessTuple: 621 """ 622 Drop a pipe's table but maintain its registration. 623 624 Parameters 625 ---------- 626 pipe: meerschaum.Pipe: 627 The pipe to be dropped. 628 629 Returns 630 ------- 631 A success tuple (bool, str). 632 """ 633 from meerschaum.utils.warnings import error 634 from meerschaum.utils.debug import dprint 635 if pipe is None: 636 error(f"Pipe cannot be None.") 637 r_url = pipe_r_url(pipe) 638 response = self.delete( 639 r_url + '/drop', 640 debug = debug, 641 ) 642 if debug: 643 dprint(response.text) 644 645 try: 646 data = response.json() 647 except Exception as e: 648 return False, f"Failed to drop {pipe}." 649 650 if isinstance(data, list): 651 response_tuple = data[0], data[1] 652 elif 'detail' in response.json(): 653 response_tuple = response.__bool__(), data['detail'] 654 else: 655 response_tuple = response.__bool__(), response.text 656 657 return response_tuple
Drop a pipe's table but maintain its registration.
Parameters
- pipe (meerschaum.Pipe:): The pipe to be dropped.
Returns
- A success tuple (bool, str).
660def clear_pipe( 661 self, 662 pipe: mrsm.Pipe, 663 debug: bool = False, 664 **kw 665) -> SuccessTuple: 666 """ 667 Delete rows in a pipe's table. 668 669 Parameters 670 ---------- 671 pipe: meerschaum.Pipe 672 The pipe with rows to be deleted. 673 674 Returns 675 ------- 676 A success tuple. 677 """ 678 kw.pop('metric_keys', None) 679 kw.pop('connector_keys', None) 680 kw.pop('location_keys', None) 681 kw.pop('action', None) 682 kw.pop('force', None) 683 return self.do_action_legacy( 684 ['clear', 'pipes'], 685 connector_keys=pipe.connector_keys, 686 metric_keys=pipe.metric_key, 687 location_keys=pipe.location_key, 688 force=True, 689 debug=debug, 690 **kw 691 )
Delete rows in a pipe's table.
Parameters
- pipe (meerschaum.Pipe): The pipe with rows to be deleted.
Returns
- A success tuple.
694def get_pipe_columns_types( 695 self, 696 pipe: mrsm.Pipe, 697 debug: bool = False, 698) -> Union[Dict[str, str], None]: 699 """ 700 Fetch the columns and types of the pipe's table. 701 702 Parameters 703 ---------- 704 pipe: meerschaum.Pipe 705 The pipe whose columns to be queried. 706 707 Returns 708 ------- 709 A dictionary mapping column names to their database types. 710 711 Examples 712 -------- 713 >>> { 714 ... 'dt': 'TIMESTAMP WITHOUT TIMEZONE', 715 ... 'id': 'BIGINT', 716 ... 'val': 'DOUBLE PRECISION', 717 ... } 718 >>> 719 """ 720 r_url = pipe_r_url(pipe) + '/columns/types' 721 response = self.get( 722 r_url, 723 debug=debug 724 ) 725 j = response.json() 726 if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1: 727 warn(j['detail']) 728 return None 729 if not isinstance(j, dict): 730 warn(response.text) 731 return None 732 return j
Fetch the columns and types of the pipe's table.
Parameters
- pipe (meerschaum.Pipe): The pipe whose columns to be queried.
Returns
- A dictionary mapping column names to their database types.
Examples
>>> {
... 'dt': 'TIMESTAMP WITHOUT TIMEZONE',
... 'id': 'BIGINT',
... 'val': 'DOUBLE PRECISION',
... }
>>>
735def get_pipe_columns_indices( 736 self, 737 pipe: mrsm.Pipe, 738 debug: bool = False, 739) -> Union[Dict[str, str], None]: 740 """ 741 Fetch the index information for a pipe. 742 743 Parameters 744 ---------- 745 pipe: mrsm.Pipe 746 The pipe whose columns to be queried. 747 748 Returns 749 ------- 750 A dictionary mapping column names to a list of associated index information. 751 """ 752 r_url = pipe_r_url(pipe) + '/columns/indices' 753 response = self.get( 754 r_url, 755 debug=debug 756 ) 757 j = response.json() 758 if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1: 759 warn(j['detail']) 760 return None 761 if not isinstance(j, dict): 762 warn(response.text) 763 return None 764 return j
Fetch the index information for a pipe.
Parameters
- pipe (mrsm.Pipe): The pipe whose columns to be queried.
Returns
- A dictionary mapping column names to a list of associated index information.
16def fetch( 17 self, 18 pipe: mrsm.Pipe, 19 begin: Union[datetime, str, int] = '', 20 end: Union[datetime, int] = None, 21 params: Optional[Dict, Any] = None, 22 debug: bool = False, 23 **kw: Any 24 ) -> Iterator['pd.DataFrame']: 25 """Get the Pipe data from the remote Pipe.""" 26 from meerschaum.utils.debug import dprint 27 from meerschaum.utils.warnings import warn, error 28 from meerschaum.config._patch import apply_patch_to_config 29 30 fetch_params = pipe.parameters.get('fetch', {}) 31 if not fetch_params: 32 warn(f"Missing 'fetch' parameters for {pipe}.", stack=False) 33 return None 34 35 pipe_meta = fetch_params.get('pipe', {}) 36 ### Legacy: check for `connector_keys`, etc. at the root. 37 if not pipe_meta: 38 ck, mk, lk = ( 39 fetch_params.get('connector_keys', None), 40 fetch_params.get('metric_key', None), 41 fetch_params.get('location_key', None), 42 ) 43 if not ck or not mk: 44 warn(f"Missing `fetch:pipe` keys for {pipe}.", stack=False) 45 return None 46 47 pipe_meta.update({ 48 'connector': ck, 49 'metric': mk, 50 'location': lk, 51 }) 52 53 pipe_meta['instance'] = self 54 source_pipe = mrsm.Pipe(**pipe_meta) 55 56 _params = copy.deepcopy(params) if params is not None else {} 57 _params = apply_patch_to_config(_params, fetch_params.get('params', {})) 58 select_columns = fetch_params.get('select_columns', []) 59 omit_columns = fetch_params.get('omit_columns', []) 60 61 return source_pipe.get_data( 62 select_columns = select_columns, 63 omit_columns = omit_columns, 64 begin = begin, 65 end = end, 66 params = _params, 67 debug = debug, 68 as_iterator = True, 69 )
Get the Pipe data from the remote Pipe.
20def register_plugin( 21 self, 22 plugin: meerschaum.core.Plugin, 23 make_archive: bool = True, 24 debug: bool = False, 25 ) -> SuccessTuple: 26 """Register a plugin and upload its archive.""" 27 import json 28 archive_path = plugin.make_tar(debug=debug) if make_archive else plugin.archive_path 29 file_pointer = open(archive_path, 'rb') 30 files = {'archive': file_pointer} 31 metadata = { 32 'version': plugin.version, 33 'attributes': json.dumps(plugin.attributes), 34 } 35 r_url = plugin_r_url(plugin) 36 try: 37 response = self.post(r_url, files=files, params=metadata, debug=debug) 38 except Exception as e: 39 return False, f"Failed to register plugin '{plugin}'." 40 finally: 41 file_pointer.close() 42 43 try: 44 success, msg = json.loads(response.text) 45 except Exception as e: 46 return False, response.text 47 48 return success, msg
Register a plugin and upload its archive.
50def install_plugin( 51 self, 52 name: str, 53 skip_deps: bool = False, 54 force: bool = False, 55 debug: bool = False 56 ) -> SuccessTuple: 57 """Download and attempt to install a plugin from the API.""" 58 import os, pathlib, json 59 from meerschaum.core import Plugin 60 from meerschaum.config._paths import PLUGINS_TEMP_RESOURCES_PATH 61 from meerschaum.utils.debug import dprint 62 from meerschaum.utils.packages import attempt_import 63 binaryornot_check = attempt_import('binaryornot.check', lazy=False) 64 r_url = plugin_r_url(name) 65 dest = pathlib.Path(os.path.join(PLUGINS_TEMP_RESOURCES_PATH, name + '.tar.gz')) 66 if debug: 67 dprint(f"Fetching from '{self.url + r_url}' to '{dest}'...") 68 archive_path = self.wget(r_url, dest, debug=debug) 69 is_binary = binaryornot_check.is_binary(str(archive_path)) 70 if not is_binary: 71 fail_msg = f"Failed to download binary for plugin '{name}'." 72 try: 73 with open(archive_path, 'r') as f: 74 j = json.load(f) 75 if isinstance(j, list): 76 success, msg = tuple(j) 77 elif isinstance(j, dict) and 'detail' in j: 78 success, msg = False, fail_msg 79 except Exception as e: 80 success, msg = False, fail_msg 81 return success, msg 82 plugin = Plugin(name, archive_path=archive_path, repo_connector=self) 83 return plugin.install(skip_deps=skip_deps, force=force, debug=debug)
Download and attempt to install a plugin from the API.
149def delete_plugin( 150 self, 151 plugin: meerschaum.core.Plugin, 152 debug: bool = False 153 ) -> SuccessTuple: 154 """Delete a plugin from an API repository.""" 155 import json 156 r_url = plugin_r_url(plugin) 157 try: 158 response = self.delete(r_url, debug=debug) 159 except Exception as e: 160 return False, f"Failed to delete plugin '{plugin}'." 161 162 try: 163 success, msg = json.loads(response.text) 164 except Exception as e: 165 return False, response.text 166 167 return success, msg
Delete a plugin from an API repository.
85def get_plugins( 86 self, 87 user_id : Optional[int] = None, 88 search_term : Optional[str] = None, 89 debug : bool = False 90 ) -> Sequence[str]: 91 """Return a list of registered plugin names. 92 93 Parameters 94 ---------- 95 user_id : 96 If specified, return all plugins from a certain user. 97 user_id : Optional[int] : 98 (Default value = None) 99 search_term : Optional[str] : 100 (Default value = None) 101 debug : bool : 102 (Default value = False) 103 104 Returns 105 ------- 106 107 """ 108 import json 109 from meerschaum.utils.warnings import warn, error 110 from meerschaum.config.static import STATIC_CONFIG 111 response = self.get( 112 STATIC_CONFIG['api']['endpoints']['plugins'], 113 params = {'user_id' : user_id, 'search_term' : search_term}, 114 use_token = True, 115 debug = debug 116 ) 117 if not response: 118 return [] 119 plugins = json.loads(response.text) 120 if not isinstance(plugins, list): 121 error(response.text) 122 return plugins
Return a list of registered plugin names.
Parameters
- user_id :: If specified, return all plugins from a certain user.
- user_id (Optional[int] :): (Default value = None)
- search_term (Optional[str] :): (Default value = None)
- debug (bool :): (Default value = False)
- Returns
- -------
124def get_plugin_attributes( 125 self, 126 plugin: meerschaum.core.Plugin, 127 debug: bool = False 128 ) -> Mapping[str, Any]: 129 """ 130 Return a plugin's attributes. 131 """ 132 import json 133 from meerschaum.utils.warnings import warn, error 134 r_url = plugin_r_url(plugin) + '/attributes' 135 response = self.get(r_url, use_token=True, debug=debug) 136 attributes = response.json() 137 if isinstance(attributes, str) and attributes and attributes[0] == '{': 138 try: 139 attributes = json.loads(attributes) 140 except Exception as e: 141 pass 142 if not isinstance(attributes, dict): 143 error(response.text) 144 elif not response and 'detail' in attributes: 145 warn(attributes['detail']) 146 return {} 147 return attributes
Return a plugin's attributes.
13def login( 14 self, 15 debug: bool = False, 16 warn: bool = True, 17 **kw: Any 18 ) -> SuccessTuple: 19 """Log in and set the session token.""" 20 from meerschaum.utils.warnings import warn as _warn, info, error 21 from meerschaum.core import User 22 from meerschaum.config.static import STATIC_CONFIG 23 import json, datetime 24 try: 25 login_data = { 26 'username': self.username, 27 'password': self.password, 28 } 29 except AttributeError: 30 return False, f"Please login with the command `login {self}`." 31 response = self.post( 32 STATIC_CONFIG['api']['endpoints']['login'], 33 data = login_data, 34 use_token = False, 35 debug = debug 36 ) 37 if response: 38 msg = f"Successfully logged into '{self}' as user '{login_data['username']}'." 39 self._token = json.loads(response.text)['access_token'] 40 self._expires = datetime.datetime.strptime( 41 json.loads(response.text)['expires'], 42 '%Y-%m-%dT%H:%M:%S.%f' 43 ) 44 else: 45 msg = ( 46 f"Failed to log into '{self}' as user '{login_data['username']}'.\n" + 47 f" Please verify login details for connector '{self}'." 48 ) 49 if warn: 50 _warn(msg, stack=False) 51 52 return response.__bool__(), msg
Log in and set the session token.
55def test_connection( 56 self, 57 **kw: Any 58 ) -> Union[bool, None]: 59 """Test if a successful connection to the API may be made.""" 60 from meerschaum.connectors.poll import retry_connect 61 _default_kw = { 62 'max_retries': 1, 'retry_wait': 0, 'warn': False, 63 'connector': self, 'enforce_chaining': False, 64 'enforce_login': False, 65 } 66 _default_kw.update(kw) 67 try: 68 return retry_connect(**_default_kw) 69 except Exception as e: 70 return False
Test if a successful connection to the API may be made.
65def register_user( 66 self, 67 user: 'meerschaum.core.User', 68 debug: bool = False, 69 **kw: Any 70 ) -> SuccessTuple: 71 """Register a new user.""" 72 import json 73 from meerschaum.config.static import STATIC_CONFIG 74 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/register" 75 data = { 76 'username': user.username, 77 'password': user.password, 78 'attributes': json.dumps(user.attributes), 79 } 80 if user.type: 81 data['type'] = user.type 82 if user.email: 83 data['email'] = user.email 84 response = self.post(r_url, data=data, debug=debug) 85 try: 86 _json = json.loads(response.text) 87 if isinstance(_json, dict) and 'detail' in _json: 88 return False, _json['detail'] 89 success_tuple = tuple(_json) 90 except Exception: 91 msg = response.text if response else f"Failed to register user '{user}'." 92 return False, msg 93 94 return tuple(success_tuple)
Register a new user.
97def get_user_id( 98 self, 99 user: 'meerschaum.core.User', 100 debug: bool = False, 101 **kw: Any 102 ) -> Optional[int]: 103 """Get a user's ID.""" 104 from meerschaum.config.static import STATIC_CONFIG 105 import json 106 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/id" 107 response = self.get(r_url, debug=debug, **kw) 108 try: 109 user_id = int(json.loads(response.text)) 110 except Exception as e: 111 user_id = None 112 return user_id
Get a user's ID.
13def get_users( 14 self, 15 debug: bool = False, 16 **kw : Any 17 ) -> List[str]: 18 """ 19 Return a list of registered usernames. 20 """ 21 from meerschaum.config.static import STATIC_CONFIG 22 import json 23 response = self.get( 24 f"{STATIC_CONFIG['api']['endpoints']['users']}", 25 debug = debug, 26 use_token = True, 27 ) 28 if not response: 29 return [] 30 try: 31 return response.json() 32 except Exception as e: 33 return []
Return a list of registered usernames.
35def edit_user( 36 self, 37 user: 'meerschaum.core.User', 38 debug: bool = False, 39 **kw: Any 40 ) -> SuccessTuple: 41 """Edit an existing user.""" 42 import json 43 from meerschaum.config.static import STATIC_CONFIG 44 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/edit" 45 data = { 46 'username': user.username, 47 'password': user.password, 48 'type': user.type, 49 'email': user.email, 50 'attributes': json.dumps(user.attributes), 51 } 52 response = self.post(r_url, data=data, debug=debug) 53 try: 54 _json = json.loads(response.text) 55 if isinstance(_json, dict) and 'detail' in _json: 56 return False, _json['detail'] 57 success_tuple = tuple(_json) 58 except Exception as e: 59 msg = response.text if response else f"Failed to edit user '{user}'." 60 return False, msg 61 62 return tuple(success_tuple)
Edit an existing user.
114def delete_user( 115 self, 116 user: 'meerschaum.core.User', 117 debug: bool = False, 118 **kw: Any 119 ) -> SuccessTuple: 120 """Delete a user.""" 121 from meerschaum.config.static import STATIC_CONFIG 122 import json 123 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}" 124 response = self.delete(r_url, debug=debug) 125 try: 126 _json = json.loads(response.text) 127 if isinstance(_json, dict) and 'detail' in _json: 128 return False, _json['detail'] 129 success_tuple = tuple(_json) 130 except Exception as e: 131 success_tuple = False, f"Failed to delete user '{user.username}'." 132 return success_tuple
Delete a user.
155def get_user_password_hash( 156 self, 157 user: 'meerschaum.core.User', 158 debug: bool = False, 159 **kw: Any 160 ) -> Optional[str]: 161 """If configured, get a user's password hash.""" 162 from meerschaum.config.static import STATIC_CONFIG 163 r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/password_hash' 164 response = self.get(r_url, debug=debug, **kw) 165 if not response: 166 return None 167 return response.json()
If configured, get a user's password hash.
169def get_user_type( 170 self, 171 user: 'meerschaum.core.User', 172 debug: bool = False, 173 **kw: Any 174 ) -> Optional[str]: 175 """If configured, get a user's type.""" 176 from meerschaum.config.static import STATIC_CONFIG 177 r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/type' 178 response = self.get(r_url, debug=debug, **kw) 179 if not response: 180 return None 181 return response.json()
If configured, get a user's type.
134def get_user_attributes( 135 self, 136 user: 'meerschaum.core.User', 137 debug: bool = False, 138 **kw 139 ) -> int: 140 """Get a user's attributes.""" 141 from meerschaum.config.static import STATIC_CONFIG 142 import json 143 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/attributes" 144 response = self.get(r_url, debug=debug, **kw) 145 try: 146 attributes = json.loads(response.text) 147 except Exception as e: 148 attributes = None 149 return attributes
Get a user's attributes.
13@classmethod 14def from_uri( 15 cls, 16 uri: str, 17 label: Optional[str] = None, 18 as_dict: bool = False, 19) -> Union[ 20 'meerschaum.connectors.APIConnector', 21 Dict[str, Union[str, int]], 22 ]: 23 """ 24 Create a new APIConnector from a URI string. 25 26 Parameters 27 ---------- 28 uri: str 29 The URI connection string. 30 31 label: Optional[str], default None 32 If provided, use this as the connector label. 33 Otherwise use the determined database name. 34 35 as_dict: bool, default False 36 If `True`, return a dictionary of the keyword arguments 37 necessary to create a new `APIConnector`, otherwise create a new object. 38 39 Returns 40 ------- 41 A new APIConnector object or a dictionary of attributes (if `as_dict` is `True`). 42 """ 43 from meerschaum.connectors.sql import SQLConnector 44 params = SQLConnector.parse_uri(uri) 45 if 'host' not in params: 46 error("No host was found in the provided URI.") 47 params['protocol'] = params.pop('flavor') 48 params['label'] = label or ( 49 ( 50 (params['username'] + '@' if 'username' in params else '') 51 + params['host'] 52 ).lower() 53 ) 54 55 return cls(**params) if not as_dict else params
Create a new APIConnector from a URI string.
Parameters
- uri (str): The URI connection string.
- label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
- as_dict (bool, default False):
If
True
, return a dictionary of the keyword arguments necessary to create a newAPIConnector
, otherwise create a new object.
Returns
- A new APIConnector object or a dictionary of attributes (if
as_dict
isTrue
).
28def get_jobs(self, debug: bool = False) -> Dict[str, Job]: 29 """ 30 Return a dictionary of remote jobs. 31 """ 32 response = self.get(JOBS_ENDPOINT, debug=debug) 33 if not response: 34 warn(f"Failed to get remote jobs from {self}.") 35 return {} 36 return { 37 name: Job( 38 name, 39 job_meta['sysargs'], 40 executor_keys=str(self), 41 _properties=job_meta['daemon']['properties'] 42 ) 43 for name, job_meta in response.json().items() 44 }
Return a dictionary of remote jobs.
47def get_job(self, name: str, debug: bool = False) -> Job: 48 """ 49 Return a single Job object. 50 """ 51 metadata = self.get_job_metadata(name, debug=debug) 52 if not metadata: 53 raise ValueError(f"Job '{name}' does not exist.") 54 55 return Job( 56 name, 57 metadata['sysargs'], 58 executor_keys=str(self), 59 _properties=metadata['daemon']['properties'], 60 )
Return a single Job object.
63def get_job_metadata(self, name: str, debug: bool = False) -> Dict[str, Any]: 64 """ 65 Return the metadata for a single job. 66 """ 67 now = time.perf_counter() 68 _job_metadata_cache = self.__dict__.get('_job_metadata_cache', None) 69 _job_metadata_timestamp = ( 70 _job_metadata_cache.get(name, {}).get('timestamp', None) 71 ) if _job_metadata_cache is not None else None 72 73 if ( 74 _job_metadata_timestamp is not None 75 and (now - _job_metadata_timestamp) < JOB_METADATA_CACHE_SECONDS 76 ): 77 if debug: 78 dprint(f"Returning cached metadata for job '{name}'.") 79 return _job_metadata_cache[name]['metadata'] 80 81 response = self.get(JOBS_ENDPOINT + f"/{name}", debug=debug) 82 if not response: 83 if debug: 84 msg = ( 85 response.json()['detail'] 86 if 'detail' in response.text 87 else response.text 88 ) 89 warn(f"Failed to get metadata for job '{name}':\n{msg}") 90 return {} 91 92 metadata = response.json() 93 if _job_metadata_cache is None: 94 self._job_metadata_cache = {} 95 96 self._job_metadata_cache[name] = { 97 'timestamp': now, 98 'metadata': metadata, 99 } 100 return metadata
Return the metadata for a single job.
102def get_job_properties(self, name: str, debug: bool = False) -> Dict[str, Any]: 103 """ 104 Return the daemon properties for a single job. 105 """ 106 metadata = self.get_job_metadata(name, debug=debug) 107 return metadata.get('daemon', {}).get('properties', {})
Return the daemon properties for a single job.
149def get_job_exists(self, name: str, debug: bool = False) -> bool: 150 """ 151 Return whether a job exists. 152 """ 153 response = self.get(JOBS_ENDPOINT + f'/{name}/exists', debug=debug) 154 if not response: 155 warn(f"Failed to determine whether job '{name}' exists.") 156 return False 157 158 return response.json()
Return whether a job exists.
161def delete_job(self, name: str, debug: bool = False) -> SuccessTuple: 162 """ 163 Delete a job. 164 """ 165 response = self.delete(JOBS_ENDPOINT + f"/{name}", debug=debug) 166 if not response: 167 if 'detail' in response.text: 168 return False, response.json()['detail'] 169 170 return False, response.text 171 172 return tuple(response.json())
Delete a job.
175def start_job(self, name: str, debug: bool = False) -> SuccessTuple: 176 """ 177 Start a job. 178 """ 179 response = self.post(JOBS_ENDPOINT + f"/{name}/start", debug=debug) 180 if not response: 181 if 'detail' in response.text: 182 return False, response.json()['detail'] 183 return False, response.text 184 185 return tuple(response.json())
Start a job.
188def create_job( 189 self, 190 name: str, 191 sysargs: List[str], 192 properties: Optional[Dict[str, str]] = None, 193 debug: bool = False, 194) -> SuccessTuple: 195 """ 196 Create a job. 197 """ 198 response = self.post( 199 JOBS_ENDPOINT + f"/{name}", 200 json={ 201 'sysargs': sysargs, 202 'properties': properties, 203 }, 204 debug=debug, 205 ) 206 if not response: 207 if 'detail' in response.text: 208 return False, response.json()['detail'] 209 return False, response.text 210 211 return tuple(response.json())
Create a job.
214def stop_job(self, name: str, debug: bool = False) -> SuccessTuple: 215 """ 216 Stop a job. 217 """ 218 response = self.post(JOBS_ENDPOINT + f"/{name}/stop", debug=debug) 219 if not response: 220 if 'detail' in response.text: 221 return False, response.json()['detail'] 222 return False, response.text 223 224 return tuple(response.json())
Stop a job.
227def pause_job(self, name: str, debug: bool = False) -> SuccessTuple: 228 """ 229 Pause a job. 230 """ 231 response = self.post(JOBS_ENDPOINT + f"/{name}/pause", debug=debug) 232 if not response: 233 if 'detail' in response.text: 234 return False, response.json()['detail'] 235 return False, response.text 236 237 return tuple(response.json())
Pause a job.
240def get_logs(self, name: str, debug: bool = False) -> str: 241 """ 242 Return the logs for a job. 243 """ 244 response = self.get(LOGS_ENDPOINT + f"/{name}") 245 if not response: 246 raise ValueError(f"Cannot fetch logs for job '{name}':\n{response.text}") 247 248 return response.json()
Return the logs for a job.
251def get_job_stop_time(self, name: str, debug: bool = False) -> Union[datetime, None]: 252 """ 253 Return the job's manual stop time. 254 """ 255 response = self.get(JOBS_ENDPOINT + f"/{name}/stop_time") 256 if not response: 257 warn(f"Failed to get stop time for job '{name}':\n{response.text}") 258 return None 259 260 data = response.json() 261 if data is None: 262 return None 263 264 return datetime.fromisoformat(data)
Return the job's manual stop time.
348def monitor_logs( 349 self, 350 name: str, 351 callback_function: Callable[[Any], Any], 352 input_callback_function: Callable[[None], str], 353 stop_callback_function: Callable[[None], str], 354 stop_on_exit: bool = False, 355 strip_timestamps: bool = False, 356 accept_input: bool = True, 357 debug: bool = False, 358): 359 """ 360 Monitor a job's log files and execute a callback with the changes. 361 """ 362 return asyncio.run( 363 self.monitor_logs_async( 364 name, 365 callback_function, 366 input_callback_function=input_callback_function, 367 stop_callback_function=stop_callback_function, 368 stop_on_exit=stop_on_exit, 369 strip_timestamps=strip_timestamps, 370 accept_input=accept_input, 371 debug=debug 372 ) 373 )
Monitor a job's log files and execute a callback with the changes.
267async def monitor_logs_async( 268 self, 269 name: str, 270 callback_function: Callable[[Any], Any], 271 input_callback_function: Callable[[], str], 272 stop_callback_function: Callable[[SuccessTuple], str], 273 stop_on_exit: bool = False, 274 strip_timestamps: bool = False, 275 accept_input: bool = True, 276 debug: bool = False, 277): 278 """ 279 Monitor a job's log files and await a callback with the changes. 280 """ 281 import traceback 282 from meerschaum.jobs import StopMonitoringLogs 283 from meerschaum.utils.formatting._jobs import strip_timestamp_from_line 284 285 websockets, websockets_exceptions = mrsm.attempt_import('websockets', 'websockets.exceptions') 286 protocol = 'ws' if self.URI.startswith('http://') else 'wss' 287 port = self.port if 'port' in self.__dict__ else '' 288 uri = f"{protocol}://{self.host}:{port}{LOGS_ENDPOINT}/{name}/ws" 289 290 async def _stdin_callback(client): 291 if input_callback_function is None: 292 return 293 294 if asyncio.iscoroutinefunction(input_callback_function): 295 data = await input_callback_function() 296 else: 297 data = input_callback_function() 298 299 await client.send(data) 300 301 async def _stop_callback(client): 302 try: 303 result = tuple(json.loads(await client.recv())) 304 except Exception as e: 305 warn(traceback.format_exc()) 306 result = False, str(e) 307 308 if stop_callback_function is not None: 309 if asyncio.iscoroutinefunction(stop_callback_function): 310 await stop_callback_function(result) 311 else: 312 stop_callback_function(result) 313 314 if stop_on_exit: 315 raise StopMonitoringLogs 316 317 message_callbacks = { 318 JOBS_STDIN_MESSAGE: _stdin_callback, 319 JOBS_STOP_MESSAGE: _stop_callback, 320 } 321 322 async with websockets.connect(uri) as websocket: 323 try: 324 await websocket.send(self.token or 'no-login') 325 except websockets_exceptions.ConnectionClosedOK: 326 pass 327 328 while True: 329 try: 330 response = await websocket.recv() 331 callback = message_callbacks.get(response, None) 332 if callback is not None: 333 await callback(websocket) 334 continue 335 336 if strip_timestamps: 337 response = strip_timestamp_from_line(response) 338 339 if asyncio.iscoroutinefunction(callback_function): 340 await callback_function(response) 341 else: 342 callback_function(response) 343 except (KeyboardInterrupt, StopMonitoringLogs): 344 await websocket.close() 345 break
Monitor a job's log files and await a callback with the changes.
375def get_job_is_blocking_on_stdin(self, name: str, debug: bool = False) -> bool: 376 """ 377 Return whether a remote job is blocking on stdin. 378 """ 379 response = self.get(JOBS_ENDPOINT + f'/{name}/is_blocking_on_stdin', debug=debug) 380 if not response: 381 return False 382 383 return response.json()
Return whether a remote job is blocking on stdin.
116def get_job_began(self, name: str, debug: bool = False) -> Union[str, None]: 117 """ 118 Return a job's `began` timestamp, if it exists. 119 """ 120 properties = self.get_job_properties(name, debug=debug) 121 began_str = properties.get('daemon', {}).get('began', None) 122 if began_str is None: 123 return None 124 125 return began_str
Return a job's began
timestamp, if it exists.
127def get_job_ended(self, name: str, debug: bool = False) -> Union[str, None]: 128 """ 129 Return a job's `ended` timestamp, if it exists. 130 """ 131 properties = self.get_job_properties(name, debug=debug) 132 ended_str = properties.get('daemon', {}).get('ended', None) 133 if ended_str is None: 134 return None 135 136 return ended_str
Return a job's ended
timestamp, if it exists.
138def get_job_paused(self, name: str, debug: bool = False) -> Union[str, None]: 139 """ 140 Return a job's `paused` timestamp, if it exists. 141 """ 142 properties = self.get_job_properties(name, debug=debug) 143 paused_str = properties.get('daemon', {}).get('paused', None) 144 if paused_str is None: 145 return None 146 147 return paused_str
Return a job's paused
timestamp, if it exists.
109def get_job_status(self, name: str, debug: bool = False) -> str: 110 """ 111 Return the job's status. 112 """ 113 metadata = self.get_job_metadata(name, debug=debug) 114 return metadata.get('status', 'stopped')
Return the job's status.
80def get_connector( 81 type: str = None, 82 label: str = None, 83 refresh: bool = False, 84 debug: bool = False, 85 **kw: Any 86) -> Connector: 87 """ 88 Return existing connector or create new connection and store for reuse. 89 90 You can create new connectors if enough parameters are provided for the given type and flavor. 91 92 93 Parameters 94 ---------- 95 type: Optional[str], default None 96 Connector type (sql, api, etc.). 97 Defaults to the type of the configured `instance_connector`. 98 99 label: Optional[str], default None 100 Connector label (e.g. main). Defaults to `'main'`. 101 102 refresh: bool, default False 103 Refresh the Connector instance / construct new object. Defaults to `False`. 104 105 kw: Any 106 Other arguments to pass to the Connector constructor. 107 If the Connector has already been constructed and new arguments are provided, 108 `refresh` is set to `True` and the old Connector is replaced. 109 110 Returns 111 ------- 112 A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`, 113 `meerschaum.connectors.sql.SQLConnector`). 114 115 Examples 116 -------- 117 The following parameters would create a new 118 `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file. 119 120 ``` 121 >>> conn = get_connector( 122 ... type = 'sql', 123 ... label = 'newlabel', 124 ... flavor = 'sqlite', 125 ... database = '/file/path/to/database.db' 126 ... ) 127 >>> 128 ``` 129 130 """ 131 from meerschaum.connectors.parse import parse_instance_keys 132 from meerschaum.config import get_config 133 from meerschaum.config.static import STATIC_CONFIG 134 from meerschaum.utils.warnings import warn 135 global _loaded_plugin_connectors 136 if isinstance(type, str) and not label and ':' in type: 137 type, label = type.split(':', maxsplit=1) 138 139 with _locks['_loaded_plugin_connectors']: 140 if not _loaded_plugin_connectors: 141 load_plugin_connectors() 142 _load_builtin_custom_connectors() 143 _loaded_plugin_connectors = True 144 145 if type is None and label is None: 146 default_instance_keys = get_config('meerschaum', 'instance', patch=True) 147 ### recursive call to get_connector 148 return parse_instance_keys(default_instance_keys) 149 150 ### NOTE: the default instance connector may not be main. 151 ### Only fall back to 'main' if the type is provided by the label is omitted. 152 label = label if label is not None else STATIC_CONFIG['connectors']['default_label'] 153 154 ### type might actually be a label. Check if so and raise a warning. 155 if type not in connectors: 156 possibilities, poss_msg = [], "" 157 for _type in get_config('meerschaum', 'connectors'): 158 if type in get_config('meerschaum', 'connectors', _type): 159 possibilities.append(f"{_type}:{type}") 160 if len(possibilities) > 0: 161 poss_msg = " Did you mean" 162 for poss in possibilities[:-1]: 163 poss_msg += f" '{poss}'," 164 if poss_msg.endswith(','): 165 poss_msg = poss_msg[:-1] 166 if len(possibilities) > 1: 167 poss_msg += " or" 168 poss_msg += f" '{possibilities[-1]}'?" 169 170 warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False) 171 return None 172 173 if 'sql' not in types: 174 from meerschaum.connectors.plugin import PluginConnector 175 from meerschaum.connectors.valkey import ValkeyConnector 176 with _locks['types']: 177 types.update({ 178 'api': APIConnector, 179 'sql': SQLConnector, 180 'plugin': PluginConnector, 181 'valkey': ValkeyConnector, 182 }) 183 184 ### determine if we need to call the constructor 185 if not refresh: 186 ### see if any user-supplied arguments differ from the existing instance 187 if label in connectors[type]: 188 warning_message = None 189 for attribute, value in kw.items(): 190 if attribute not in connectors[type][label].meta: 191 import inspect 192 cls = connectors[type][label].__class__ 193 cls_init_signature = inspect.signature(cls) 194 cls_init_params = cls_init_signature.parameters 195 if attribute not in cls_init_params: 196 warning_message = ( 197 f"Received new attribute '{attribute}' not present in connector " + 198 f"{connectors[type][label]}.\n" 199 ) 200 elif connectors[type][label].__dict__[attribute] != value: 201 warning_message = ( 202 f"Mismatched values for attribute '{attribute}' in connector " 203 + f"'{connectors[type][label]}'.\n" + 204 f" - Keyword value: '{value}'\n" + 205 f" - Existing value: '{connectors[type][label].__dict__[attribute]}'\n" 206 ) 207 if warning_message is not None: 208 warning_message += ( 209 "\nSetting `refresh` to True and recreating connector with type:" 210 + f" '{type}' and label '{label}'." 211 ) 212 refresh = True 213 warn(warning_message) 214 else: ### connector doesn't yet exist 215 refresh = True 216 217 ### only create an object if refresh is True 218 ### (can be manually specified, otherwise determined above) 219 if refresh: 220 with _locks['connectors']: 221 try: 222 ### will raise an error if configuration is incorrect / missing 223 conn = types[type](label=label, **kw) 224 connectors[type][label] = conn 225 except InvalidAttributesError as ie: 226 warn( 227 f"Incorrect attributes for connector '{type}:{label}'.\n" 228 + str(ie), 229 stack = False, 230 ) 231 conn = None 232 except Exception as e: 233 from meerschaum.utils.formatting import get_console 234 console = get_console() 235 if console: 236 console.print_exception() 237 warn( 238 f"Exception when creating connector '{type}:{label}'.\n" + str(e), 239 stack = False, 240 ) 241 conn = None 242 if conn is None: 243 return None 244 245 return connectors[type][label]
Return existing connector or create new connection and store for reuse.
You can create new connectors if enough parameters are provided for the given type and flavor.
Parameters
- type (Optional[str], default None):
Connector type (sql, api, etc.).
Defaults to the type of the configured
instance_connector
. - label (Optional[str], default None):
Connector label (e.g. main). Defaults to
'main'
. - refresh (bool, default False):
Refresh the Connector instance / construct new object. Defaults to
False
. - kw (Any):
Other arguments to pass to the Connector constructor.
If the Connector has already been constructed and new arguments are provided,
refresh
is set toTrue
and the old Connector is replaced.
Returns
- A new Meerschaum connector (e.g.
meerschaum.connectors.api.APIConnector
, meerschaum.connectors.sql.SQLConnector
).
Examples
The following parameters would create a new
meerschaum.connectors.sql.SQLConnector
that isn't in the configuration file.
>>> conn = get_connector(
... type = 'sql',
... label = 'newlabel',
... flavor = 'sqlite',
... database = '/file/path/to/database.db'
... )
>>>
248def is_connected(keys: str, **kw) -> bool: 249 """ 250 Check if the connector keys correspond to an active connection. 251 If the connector has not been created, it will immediately return `False`. 252 If the connector exists but cannot communicate with the source, return `False`. 253 254 **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`). 255 Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`. 256 257 Parameters 258 ---------- 259 keys: 260 The keys to the connector (e.g. `'sql:main'`). 261 262 Returns 263 ------- 264 A `bool` corresponding to whether a successful connection may be made. 265 266 """ 267 import warnings 268 if ':' not in keys: 269 warn(f"Invalid connector keys '{keys}'") 270 271 try: 272 typ, label = keys.split(':') 273 except Exception: 274 return False 275 if typ not in instance_types: 276 return False 277 if label not in connectors.get(typ, {}): 278 return False 279 280 from meerschaum.connectors.parse import parse_instance_keys 281 conn = parse_instance_keys(keys) 282 try: 283 with warnings.catch_warnings(): 284 warnings.filterwarnings('ignore') 285 return conn.test_connection(**kw) 286 except Exception: 287 return False
Check if the connector keys correspond to an active connection.
If the connector has not been created, it will immediately return False
.
If the connector exists but cannot communicate with the source, return False
.
NOTE: Only works with instance connectors (SQLConnectors
and APIConnectors
).
Keyword arguments are passed to meerschaum.connectors.poll.retry_connect
.
Parameters
- keys:: The keys to the connector (e.g.
'sql:main'
).
Returns
- A
bool
corresponding to whether a successful connection may be made.