meerschaum.connectors
Create connectors with meerschaum.connectors.get_connector().
For ease of use, you can also import from the root meerschaum module:
>>> from meerschaum import get_connector
>>> conn = get_connector()
1#! /usr/bin/env python 2# -*- coding: utf-8 -*- 3# vim:fenc=utf-8 4 5""" 6Create connectors with `meerschaum.connectors.get_connector()`. 7For ease of use, you can also import from the root `meerschaum` module: 8``` 9>>> from meerschaum import get_connector 10>>> conn = get_connector() 11``` 12""" 13 14from __future__ import annotations 15 16import meerschaum as mrsm 17from meerschaum.utils.typing import Any, Union, List, Dict, Optional 18from meerschaum.utils.threading import RLock 19from meerschaum.utils.warnings import warn 20 21from meerschaum.connectors._Connector import Connector, InvalidAttributesError 22from meerschaum.connectors.instance._InstanceConnector import InstanceConnector 23from meerschaum.connectors.sql._SQLConnector import SQLConnector 24from meerschaum.connectors.api._APIConnector import APIConnector 25 26__all__ = ( 27 "make_connector", 28 "Connector", 29 "InstanceConnector", 30 "SQLConnector", 31 "APIConnector", 32 "get_connector", 33 "is_connected", 34 "poll", 35 "api", 36 "sql", 37 "valkey", 38 "parse", 39) 40 41### store connectors partitioned by 42### type, label for reuse 43connectors: Dict[str, Dict[str, Connector]] = { 44 'api' : {}, 45 'sql' : {}, 46 'plugin' : {}, 47 'valkey' : {}, 48} 49instance_types: List[str] = ['sql', 'api'] 50_locks: Dict[str, RLock] = { 51 'connectors' : RLock(), 52 'types' : RLock(), 53 'custom_types' : RLock(), 54 'plugins_types' : RLock(), 55 '_loaded_plugin_connectors': RLock(), 56 'instance_types' : RLock(), 57} 58 59### Fill this with objects only when connectors are first referenced. 60types: Dict[str, Any] = {} 61custom_types: set = set() 62plugins_types: Dict[str, List[str]] = {} 63_known_custom_types: set = set() 64_loaded_plugin_connectors: bool = False 65 66 67def get_connector( 68 type: str = None, 69 label: str = None, 70 refresh: bool = False, 71 debug: bool = False, 72 _load_plugins: bool = True, 73 **kw: Any 74) -> Connector: 75 """ 76 Return existing connector or create new connection and store for reuse. 77 78 You can create new connectors if enough parameters are provided for the given type and flavor. 79 80 Parameters 81 ---------- 82 type: Optional[str], default None 83 Connector type (sql, api, etc.). 84 Defaults to the type of the configured `instance_connector`. 85 86 label: Optional[str], default None 87 Connector label (e.g. main). Defaults to `'main'`. 88 89 refresh: bool, default False 90 Refresh the Connector instance / construct new object. Defaults to `False`. 91 92 kw: Any 93 Other arguments to pass to the Connector constructor. 94 If the Connector has already been constructed and new arguments are provided, 95 `refresh` is set to `True` and the old Connector is replaced. 96 97 Returns 98 ------- 99 A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`, 100 `meerschaum.connectors.sql.SQLConnector`). 101 102 Examples 103 -------- 104 The following parameters would create a new 105 `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file. 106 107 ``` 108 >>> conn = get_connector( 109 ... type = 'sql', 110 ... label = 'newlabel', 111 ... flavor = 'sqlite', 112 ... database = '/file/path/to/database.db' 113 ... ) 114 >>> 115 ``` 116 117 """ 118 from meerschaum.connectors.parse import parse_instance_keys 119 from meerschaum.config import get_config 120 from meerschaum._internal.static import STATIC_CONFIG 121 from meerschaum.utils.warnings import warn 122 global _loaded_plugin_connectors 123 if isinstance(type, str) and not label and ':' in type: 124 type, label = type.split(':', maxsplit=1) 125 126 if _load_plugins: 127 with _locks['_loaded_plugin_connectors']: 128 if not _loaded_plugin_connectors: 129 load_plugin_connectors() 130 _load_builtin_custom_connectors() 131 _loaded_plugin_connectors = True 132 133 if type is None and label is None: 134 default_instance_keys = get_config('meerschaum', 'instance', patch=True) 135 ### recursive call to get_connector 136 return parse_instance_keys(default_instance_keys) 137 138 ### NOTE: the default instance connector may not be main. 139 ### Only fall back to 'main' if the type is provided by the label is omitted. 140 label = label if label is not None else STATIC_CONFIG['connectors']['default_label'] 141 142 ### type might actually be a label. Check if so and raise a warning. 143 if type not in connectors: 144 possibilities, poss_msg = [], "" 145 for _type in get_config('meerschaum', 'connectors'): 146 if type in get_config('meerschaum', 'connectors', _type): 147 possibilities.append(f"{_type}:{type}") 148 if len(possibilities) > 0: 149 poss_msg = " Did you mean" 150 for poss in possibilities[:-1]: 151 poss_msg += f" '{poss}'," 152 if poss_msg.endswith(','): 153 poss_msg = poss_msg[:-1] 154 if len(possibilities) > 1: 155 poss_msg += " or" 156 poss_msg += f" '{possibilities[-1]}'?" 157 158 warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False) 159 return None 160 161 if 'sql' not in types: 162 from meerschaum.connectors.plugin import PluginConnector 163 from meerschaum.connectors.valkey import ValkeyConnector 164 with _locks['types']: 165 types.update({ 166 'api': APIConnector, 167 'sql': SQLConnector, 168 'plugin': PluginConnector, 169 'valkey': ValkeyConnector, 170 }) 171 172 ### determine if we need to call the constructor 173 if not refresh: 174 ### see if any user-supplied arguments differ from the existing instance 175 if label in connectors[type]: 176 warning_message = None 177 for attribute, value in kw.items(): 178 if attribute not in connectors[type][label].meta: 179 import inspect 180 cls = connectors[type][label].__class__ 181 cls_init_signature = inspect.signature(cls) 182 cls_init_params = cls_init_signature.parameters 183 if attribute not in cls_init_params: 184 warning_message = ( 185 f"Received new attribute '{attribute}' not present in connector " + 186 f"{connectors[type][label]}.\n" 187 ) 188 elif connectors[type][label].__dict__[attribute] != value: 189 warning_message = ( 190 f"Mismatched values for attribute '{attribute}' in connector " 191 + f"'{connectors[type][label]}'.\n" + 192 f" - Keyword value: '{value}'\n" + 193 f" - Existing value: '{connectors[type][label].__dict__[attribute]}'\n" 194 ) 195 if warning_message is not None: 196 warning_message += ( 197 "\nSetting `refresh` to True and recreating connector with type:" 198 + f" '{type}' and label '{label}'." 199 ) 200 refresh = True 201 warn(warning_message) 202 else: ### connector doesn't yet exist 203 refresh = True 204 205 ### only create an object if refresh is True 206 ### (can be manually specified, otherwise determined above) 207 if refresh: 208 with _locks['connectors']: 209 try: 210 ### will raise an error if configuration is incorrect / missing 211 conn = types[type](label=label, **kw) 212 connectors[type][label] = conn 213 except InvalidAttributesError as ie: 214 warn( 215 f"Incorrect attributes for connector '{type}:{label}'.\n" 216 + str(ie), 217 stack = False, 218 ) 219 conn = None 220 except Exception as e: 221 from meerschaum.utils.formatting import get_console 222 console = get_console() 223 if console: 224 console.print_exception() 225 warn( 226 f"Exception when creating connector '{type}:{label}'.\n" + str(e), 227 stack = False, 228 ) 229 conn = None 230 if conn is None: 231 return None 232 233 return connectors[type][label] 234 235 236def is_connected(keys: str, **kw) -> bool: 237 """ 238 Check if the connector keys correspond to an active connection. 239 If the connector has not been created, it will immediately return `False`. 240 If the connector exists but cannot communicate with the source, return `False`. 241 242 **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`). 243 Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`. 244 245 Parameters 246 ---------- 247 keys: 248 The keys to the connector (e.g. `'sql:main'`). 249 250 Returns 251 ------- 252 A `bool` corresponding to whether a successful connection may be made. 253 254 """ 255 import warnings 256 if ':' not in keys: 257 warn(f"Invalid connector keys '{keys}'") 258 259 try: 260 typ, label = keys.split(':') 261 except Exception: 262 return False 263 if typ not in instance_types: 264 return False 265 if label not in connectors.get(typ, {}): 266 return False 267 268 from meerschaum.connectors.parse import parse_instance_keys 269 conn = parse_instance_keys(keys) 270 try: 271 with warnings.catch_warnings(): 272 warnings.filterwarnings('ignore') 273 return conn.test_connection(**kw) 274 except Exception: 275 return False 276 277 278def make_connector(cls, _is_executor: bool = False): 279 """ 280 Register a class as a `Connector`. 281 The `type` will be the lower case of the class name, without the suffix `connector`. 282 283 Parameters 284 ---------- 285 instance: bool, default False 286 If `True`, make this connector type an instance connector. 287 This requires implementing the various pipes functions and lots of testing. 288 289 Examples 290 -------- 291 >>> import meerschaum as mrsm 292 >>> from meerschaum.connectors import make_connector, Connector 293 >>> 294 >>> @make_connector 295 >>> class FooConnector(Connector): 296 ... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password'] 297 ... 298 >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat') 299 >>> print(conn.username, conn.password) 300 dog cat 301 >>> 302 """ 303 import re 304 from meerschaum.plugins import _get_parent_plugin 305 suffix_regex = ( 306 r'connector$' 307 if not _is_executor 308 else r'executor$' 309 ) 310 plugin_name = _get_parent_plugin(2) 311 typ = re.sub(suffix_regex, '', cls.__name__.lower()) 312 with _locks['types']: 313 types[typ] = cls 314 with _locks['custom_types']: 315 custom_types.add(typ) 316 if plugin_name: 317 with _locks['plugins_types']: 318 if plugin_name not in plugins_types: 319 plugins_types[plugin_name] = [] 320 plugins_types[plugin_name].append(typ) 321 with _locks['connectors']: 322 if typ not in connectors: 323 connectors[typ] = {} 324 if getattr(cls, 'IS_INSTANCE', False): 325 with _locks['instance_types']: 326 if typ not in instance_types: 327 instance_types.append(typ) 328 329 return cls 330 331 332def load_plugin_connectors(): 333 """ 334 If a plugin makes use of the `make_connector` decorator, 335 load its module. 336 """ 337 from meerschaum.plugins import get_plugins, import_plugins 338 to_import = [] 339 for plugin in get_plugins(): 340 if plugin is None: 341 continue 342 343 with open(plugin.__file__, encoding='utf-8') as f: 344 text = f.read() 345 346 if 'make_connector' in text or 'Connector' in text: 347 to_import.append(plugin.name) 348 349 if not to_import: 350 return 351 352 import_plugins(*to_import) 353 354 355def unload_plugin_connectors( 356 plugin_names: Optional[List[str]] = None, 357 debug: bool = False, 358) -> None: 359 """ 360 Unload custom connectors added by plugins. 361 """ 362 from meerschaum.plugins import get_plugins_names 363 global custom_types, _known_custom_types, types, plugins_types, connectors, _loaded_plugin_connectors 364 365 plugin_names = plugin_names or get_plugins_names() 366 367 for plugin_name in plugin_names: 368 plugin_types = plugins_types.get(plugin_name, []) 369 for typ in plugin_types: 370 _ = types.pop(typ, None) 371 _ = connectors.pop(typ, None) 372 if typ in instance_types: 373 instance_types.remove(typ) 374 375 custom_types.clear() 376 custom_types.update(_known_custom_types) 377 _loaded_plugin_connectors = False 378 379 380def get_connector_plugin( 381 connector: Connector, 382) -> Union[str, None, mrsm.Plugin]: 383 """ 384 Determine the plugin for a connector. 385 This is useful for handling virtual environments for custom instance connectors. 386 387 Parameters 388 ---------- 389 connector: Connector 390 The connector which may require a virtual environment. 391 392 Returns 393 ------- 394 A Plugin, 'mrsm', or None. 395 """ 396 if not hasattr(connector, 'type'): 397 return None 398 plugin_name = ( 399 connector.__module__.replace('plugins.', '').split('.')[0] 400 if connector.type in custom_types else ( 401 connector.label 402 if connector.type == 'plugin' 403 else 'mrsm' 404 ) 405 ) 406 plugin = mrsm.Plugin(plugin_name) 407 return plugin if plugin.is_installed() else None 408 409 410def _load_builtin_custom_connectors(): 411 """ 412 Import custom connectors decorated with `@make_connector` or `@make_executor`. 413 """ 414 import meerschaum.jobs.systemd 415 import meerschaum.connectors.valkey 416 _known_custom_types.add('valkey') 417 _known_custom_types.add('systemd')
279def make_connector(cls, _is_executor: bool = False): 280 """ 281 Register a class as a `Connector`. 282 The `type` will be the lower case of the class name, without the suffix `connector`. 283 284 Parameters 285 ---------- 286 instance: bool, default False 287 If `True`, make this connector type an instance connector. 288 This requires implementing the various pipes functions and lots of testing. 289 290 Examples 291 -------- 292 >>> import meerschaum as mrsm 293 >>> from meerschaum.connectors import make_connector, Connector 294 >>> 295 >>> @make_connector 296 >>> class FooConnector(Connector): 297 ... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password'] 298 ... 299 >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat') 300 >>> print(conn.username, conn.password) 301 dog cat 302 >>> 303 """ 304 import re 305 from meerschaum.plugins import _get_parent_plugin 306 suffix_regex = ( 307 r'connector$' 308 if not _is_executor 309 else r'executor$' 310 ) 311 plugin_name = _get_parent_plugin(2) 312 typ = re.sub(suffix_regex, '', cls.__name__.lower()) 313 with _locks['types']: 314 types[typ] = cls 315 with _locks['custom_types']: 316 custom_types.add(typ) 317 if plugin_name: 318 with _locks['plugins_types']: 319 if plugin_name not in plugins_types: 320 plugins_types[plugin_name] = [] 321 plugins_types[plugin_name].append(typ) 322 with _locks['connectors']: 323 if typ not in connectors: 324 connectors[typ] = {} 325 if getattr(cls, 'IS_INSTANCE', False): 326 with _locks['instance_types']: 327 if typ not in instance_types: 328 instance_types.append(typ) 329 330 return cls
Register a class as a Connector.
The type will be the lower case of the class name, without the suffix connector.
Parameters
- instance (bool, default False):
If
True, make this connector type an instance connector. This requires implementing the various pipes functions and lots of testing.
Examples
>>> import meerschaum as mrsm
>>> from meerschaum.connectors import make_connector, Connector
>>>
>>> @make_connector
>>> class FooConnector(Connector):
... REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
...
>>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
>>> print(conn.username, conn.password)
dog cat
>>>
22class Connector(metaclass=abc.ABCMeta): 23 """ 24 The base connector class to hold connection attributes. 25 """ 26 27 IS_INSTANCE: bool = False 28 29 def __init__( 30 self, 31 type: Optional[str] = None, 32 label: Optional[str] = None, 33 **kw: Any 34 ): 35 """ 36 Set the given keyword arguments as attributes. 37 38 Parameters 39 ---------- 40 type: str 41 The `type` of the connector (e.g. `sql`, `api`, `plugin`). 42 43 label: str 44 The `label` for the connector. 45 46 47 Examples 48 -------- 49 Run `mrsm edit config` and to edit connectors in the YAML file: 50 51 ```yaml 52 meerschaum: 53 connections: 54 {type}: 55 {label}: 56 ### attributes go here 57 ``` 58 59 """ 60 self._original_dict = copy.deepcopy(self.__dict__) 61 self._set_attributes(type=type, label=label, **kw) 62 63 ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set. 64 self.verify_attributes( 65 ['uri'] 66 if 'uri' in self.__dict__ 67 else getattr(self, 'REQUIRED_ATTRIBUTES', None) 68 ) 69 70 def _reset_attributes(self): 71 self.__dict__ = self._original_dict 72 73 def _set_attributes( 74 self, 75 *args, 76 inherit_default: bool = True, 77 **kw: Any 78 ): 79 from meerschaum._internal.static import STATIC_CONFIG 80 from meerschaum.utils.warnings import error 81 82 self._attributes = {} 83 84 default_label = STATIC_CONFIG['connectors']['default_label'] 85 86 ### NOTE: Support the legacy method of explicitly passing the type. 87 label = kw.get('label', None) 88 if label is None: 89 if len(args) == 2: 90 label = args[1] 91 elif len(args) == 0: 92 label = None 93 else: 94 label = args[0] 95 96 if label == 'default': 97 error( 98 f"Label cannot be 'default'. Did you mean '{default_label}'?", 99 InvalidAttributesError, 100 ) 101 self.__dict__['label'] = label 102 103 from meerschaum.config import get_config 104 conn_configs = copy.deepcopy(get_config('meerschaum', 'connectors')) 105 connector_config = copy.deepcopy(get_config('system', 'connectors')) 106 107 ### inherit attributes from 'default' if exists 108 if inherit_default: 109 inherit_from = 'default' 110 if self.type in conn_configs and inherit_from in conn_configs[self.type]: 111 _inherit_dict = copy.deepcopy(conn_configs[self.type][inherit_from]) 112 self._attributes.update(_inherit_dict) 113 114 ### load user config into self._attributes 115 if self.type in conn_configs and self.label in conn_configs[self.type]: 116 self._attributes.update(conn_configs[self.type][self.label] or {}) 117 118 ### load system config into self._sys_config 119 ### (deep copy so future Connectors don't inherit changes) 120 if self.type in connector_config: 121 self._sys_config = copy.deepcopy(connector_config[self.type]) 122 123 ### add additional arguments or override configuration 124 self._attributes.update(kw) 125 126 ### finally, update __dict__ with _attributes. 127 self.__dict__.update(self._attributes) 128 129 def verify_attributes( 130 self, 131 required_attributes: Optional[List[str]] = None, 132 debug: bool = False, 133 ) -> None: 134 """ 135 Ensure that the required attributes have been met. 136 137 The Connector base class checks the minimum requirements. 138 Child classes may enforce additional requirements. 139 140 Parameters 141 ---------- 142 required_attributes: Optional[List[str]], default None 143 Attributes to be verified. If `None`, default to `['label']`. 144 145 debug: bool, default False 146 Verbosity toggle. 147 148 Returns 149 ------- 150 Don't return anything. 151 152 Raises 153 ------ 154 An error if any of the required attributes are missing. 155 """ 156 from meerschaum.utils.warnings import error 157 from meerschaum.utils.misc import items_str 158 if required_attributes is None: 159 required_attributes = ['type', 'label'] 160 161 missing_attributes = set() 162 for a in required_attributes: 163 if a not in self.__dict__: 164 missing_attributes.add(a) 165 if len(missing_attributes) > 0: 166 error( 167 ( 168 f"Missing {items_str(list(missing_attributes))} " 169 + f"for connector '{self.type}:{self.label}'." 170 ), 171 InvalidAttributesError, 172 silent=True, 173 stack=False 174 ) 175 176 177 def __str__(self): 178 """ 179 When cast to a string, return type:label. 180 """ 181 return f"{self.type}:{self.label}" 182 183 def __repr__(self): 184 """ 185 Represent the connector as type:label. 186 """ 187 return str(self) 188 189 @property 190 def meta(self) -> Dict[str, Any]: 191 """ 192 Return the keys needed to reconstruct this Connector. 193 """ 194 _meta = { 195 key: value 196 for key, value in self.__dict__.items() 197 if not str(key).startswith('_') 198 } 199 _meta.update({ 200 'type': self.type, 201 'label': self.label, 202 }) 203 return _meta 204 205 206 @property 207 def type(self) -> str: 208 """ 209 Return the type for this connector. 210 """ 211 _type = self.__dict__.get('type', None) 212 if _type is None: 213 import re 214 is_executor = self.__class__.__name__.lower().endswith('executor') 215 suffix_regex = ( 216 r'connector$' 217 if not is_executor 218 else r'executor$' 219 ) 220 _type = re.sub(suffix_regex, '', self.__class__.__name__.lower()) 221 if not _type or _type.lower() == 'instance': 222 raise ValueError("No type could be determined for this connector.") 223 self.__dict__['type'] = _type 224 return _type 225 226 227 @property 228 def label(self) -> str: 229 """ 230 Return the label for this connector. 231 """ 232 _label = self.__dict__.get('label', None) 233 if _label is None: 234 from meerschaum._internal.static import STATIC_CONFIG 235 _label = STATIC_CONFIG['connectors']['default_label'] 236 self.__dict__['label'] = _label 237 return _label
The base connector class to hold connection attributes.
29 def __init__( 30 self, 31 type: Optional[str] = None, 32 label: Optional[str] = None, 33 **kw: Any 34 ): 35 """ 36 Set the given keyword arguments as attributes. 37 38 Parameters 39 ---------- 40 type: str 41 The `type` of the connector (e.g. `sql`, `api`, `plugin`). 42 43 label: str 44 The `label` for the connector. 45 46 47 Examples 48 -------- 49 Run `mrsm edit config` and to edit connectors in the YAML file: 50 51 ```yaml 52 meerschaum: 53 connections: 54 {type}: 55 {label}: 56 ### attributes go here 57 ``` 58 59 """ 60 self._original_dict = copy.deepcopy(self.__dict__) 61 self._set_attributes(type=type, label=label, **kw) 62 63 ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set. 64 self.verify_attributes( 65 ['uri'] 66 if 'uri' in self.__dict__ 67 else getattr(self, 'REQUIRED_ATTRIBUTES', None) 68 )
129 def verify_attributes( 130 self, 131 required_attributes: Optional[List[str]] = None, 132 debug: bool = False, 133 ) -> None: 134 """ 135 Ensure that the required attributes have been met. 136 137 The Connector base class checks the minimum requirements. 138 Child classes may enforce additional requirements. 139 140 Parameters 141 ---------- 142 required_attributes: Optional[List[str]], default None 143 Attributes to be verified. If `None`, default to `['label']`. 144 145 debug: bool, default False 146 Verbosity toggle. 147 148 Returns 149 ------- 150 Don't return anything. 151 152 Raises 153 ------ 154 An error if any of the required attributes are missing. 155 """ 156 from meerschaum.utils.warnings import error 157 from meerschaum.utils.misc import items_str 158 if required_attributes is None: 159 required_attributes = ['type', 'label'] 160 161 missing_attributes = set() 162 for a in required_attributes: 163 if a not in self.__dict__: 164 missing_attributes.add(a) 165 if len(missing_attributes) > 0: 166 error( 167 ( 168 f"Missing {items_str(list(missing_attributes))} " 169 + f"for connector '{self.type}:{self.label}'." 170 ), 171 InvalidAttributesError, 172 silent=True, 173 stack=False 174 )
Ensure that the required attributes have been met.
The Connector base class checks the minimum requirements. Child classes may enforce additional requirements.
Parameters
- required_attributes (Optional[List[str]], default None):
Attributes to be verified. If
None, default to['label']. - debug (bool, default False): Verbosity toggle.
Returns
- Don't return anything.
Raises
- An error if any of the required attributes are missing.
189 @property 190 def meta(self) -> Dict[str, Any]: 191 """ 192 Return the keys needed to reconstruct this Connector. 193 """ 194 _meta = { 195 key: value 196 for key, value in self.__dict__.items() 197 if not str(key).startswith('_') 198 } 199 _meta.update({ 200 'type': self.type, 201 'label': self.label, 202 }) 203 return _meta
Return the keys needed to reconstruct this Connector.
206 @property 207 def type(self) -> str: 208 """ 209 Return the type for this connector. 210 """ 211 _type = self.__dict__.get('type', None) 212 if _type is None: 213 import re 214 is_executor = self.__class__.__name__.lower().endswith('executor') 215 suffix_regex = ( 216 r'connector$' 217 if not is_executor 218 else r'executor$' 219 ) 220 _type = re.sub(suffix_regex, '', self.__class__.__name__.lower()) 221 if not _type or _type.lower() == 'instance': 222 raise ValueError("No type could be determined for this connector.") 223 self.__dict__['type'] = _type 224 return _type
Return the type for this connector.
227 @property 228 def label(self) -> str: 229 """ 230 Return the label for this connector. 231 """ 232 _label = self.__dict__.get('label', None) 233 if _label is None: 234 from meerschaum._internal.static import STATIC_CONFIG 235 _label = STATIC_CONFIG['connectors']['default_label'] 236 self.__dict__['label'] = _label 237 return _label
Return the label for this connector.
18class InstanceConnector(Connector): 19 """ 20 Instance connectors define the interface for managing pipes and provide methods 21 for management of users, plugins, tokens, and other metadata built atop pipes. 22 """ 23 24 IS_INSTANCE: bool = True 25 IS_THREAD_SAFE: bool = False 26 27 from ._users import ( 28 get_users_pipe, 29 register_user, 30 get_user_id, 31 get_username, 32 get_users, 33 edit_user, 34 delete_user, 35 get_user_password_hash, 36 get_user_type, 37 get_user_attributes, 38 ) 39 40 from ._plugins import ( 41 get_plugins_pipe, 42 register_plugin, 43 get_plugin_user_id, 44 delete_plugin, 45 get_plugin_id, 46 get_plugin_version, 47 get_plugins, 48 get_plugin_user_id, 49 get_plugin_username, 50 get_plugin_attributes, 51 ) 52 53 from ._tokens import ( 54 get_tokens_pipe, 55 register_token, 56 edit_token, 57 invalidate_token, 58 delete_token, 59 get_token, 60 get_tokens, 61 get_token_model, 62 get_token_secret_hash, 63 token_exists, 64 get_token_scopes, 65 ) 66 67 from ._pipes import ( 68 register_pipe, 69 get_pipe_attributes, 70 get_pipe_id, 71 edit_pipe, 72 delete_pipe, 73 fetch_pipes_keys, 74 pipe_exists, 75 drop_pipe, 76 drop_pipe_indices, 77 sync_pipe, 78 create_pipe_indices, 79 clear_pipe, 80 get_pipe_data, 81 get_sync_time, 82 get_pipe_columns_types, 83 get_pipe_columns_indices, 84 )
Instance connectors define the interface for managing pipes and provide methods for management of users, plugins, tokens, and other metadata built atop pipes.
18def get_users_pipe(self) -> 'mrsm.Pipe': 19 """ 20 Return the pipe used for users registration. 21 """ 22 if '_users_pipe' in self.__dict__: 23 return self._users_pipe 24 25 cache_connector = self.__dict__.get('_cache_connector', None) 26 self._users_pipe = mrsm.Pipe( 27 'mrsm', 'users', 28 instance=self, 29 target='mrsm_users', 30 temporary=True, 31 cache=True, 32 cache_connector_keys=cache_connector, 33 static=True, 34 null_indices=False, 35 columns={ 36 'primary': 'user_id', 37 }, 38 dtypes={ 39 'user_id': 'uuid', 40 'username': 'string', 41 'password_hash': 'string', 42 'email': 'string', 43 'user_type': 'string', 44 'attributes': 'json', 45 }, 46 indices={ 47 'unique': 'username', 48 }, 49 ) 50 return self._users_pipe
Return the pipe used for users registration.
53def register_user( 54 self, 55 user: User, 56 debug: bool = False, 57 **kwargs: Any 58) -> mrsm.SuccessTuple: 59 """ 60 Register a new user to the users pipe. 61 """ 62 users_pipe = self.get_users_pipe() 63 user.user_id = uuid.uuid4() 64 sync_success, sync_msg = users_pipe.sync( 65 [{ 66 'user_id': user.user_id, 67 'username': user.username, 68 'email': user.email, 69 'password_hash': user.password_hash, 70 'user_type': user.type, 71 'attributes': user.attributes, 72 }], 73 check_existing=False, 74 debug=debug, 75 ) 76 if not sync_success: 77 return False, f"Failed to register user '{user.username}':\n{sync_msg}" 78 79 return True, "Success"
Register a new user to the users pipe.
82def get_user_id(self, user: User, debug: bool = False) -> Union[uuid.UUID, None]: 83 """ 84 Return a user's ID from the username. 85 """ 86 users_pipe = self.get_users_pipe() 87 result_df = users_pipe.get_data(['user_id'], params={'username': user.username}, limit=1) 88 if result_df is None or len(result_df) == 0: 89 return None 90 return result_df['user_id'][0]
Return a user's ID from the username.
93def get_username(self, user_id: Any, debug: bool = False) -> Any: 94 """ 95 Return the username from the given ID. 96 """ 97 users_pipe = self.get_users_pipe() 98 return users_pipe.get_value('username', {'user_id': user_id}, debug=debug)
Return the username from the given ID.
101def get_users( 102 self, 103 debug: bool = False, 104 **kw: Any 105) -> List[str]: 106 """ 107 Get the registered usernames. 108 """ 109 users_pipe = self.get_users_pipe() 110 df = users_pipe.get_data() 111 if df is None: 112 return [] 113 114 return list(df['username'])
Get the registered usernames.
117def edit_user(self, user: User, debug: bool = False) -> mrsm.SuccessTuple: 118 """ 119 Edit the attributes for an existing user. 120 """ 121 users_pipe = self.get_users_pipe() 122 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 123 124 doc = {'user_id': user_id} 125 if user.email != '': 126 doc['email'] = user.email 127 if user.password_hash != '': 128 doc['password_hash'] = user.password_hash 129 if user.type != '': 130 doc['user_type'] = user.type 131 if user.attributes: 132 doc['attributes'] = user.attributes 133 134 sync_success, sync_msg = users_pipe.sync([doc], debug=debug) 135 if not sync_success: 136 return False, f"Failed to edit user '{user.username}':\n{sync_msg}" 137 138 return True, "Success"
Edit the attributes for an existing user.
141def delete_user(self, user: User, debug: bool = False) -> mrsm.SuccessTuple: 142 """ 143 Delete a user from the users table. 144 """ 145 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 146 users_pipe = self.get_users_pipe() 147 clear_success, clear_msg = users_pipe.clear(params={'user_id': user_id}, debug=debug) 148 if not clear_success: 149 return False, f"Failed to delete user '{user}':\n{clear_msg}" 150 return True, "Success"
Delete a user from the users table.
153def get_user_password_hash(self, user: User, debug: bool = False) -> Union[uuid.UUID, None]: 154 """ 155 Get a user's password hash from the users table. 156 """ 157 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 158 users_pipe = self.get_users_pipe() 159 result_df = users_pipe.get_data(['password_hash'], params={'user_id': user_id}, debug=debug) 160 if result_df is None or len(result_df) == 0: 161 return None 162 163 return result_df['password_hash'][0]
Get a user's password hash from the users table.
166def get_user_type(self, user: User, debug: bool = False) -> Union[str, None]: 167 """ 168 Get a user's type from the users table. 169 """ 170 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 171 users_pipe = self.get_users_pipe() 172 result_df = users_pipe.get_data(['user_type'], params={'user_id': user_id}, debug=debug) 173 if result_df is None or len(result_df) == 0: 174 return None 175 176 return result_df['user_type'][0]
Get a user's type from the users table.
179def get_user_attributes(self, user: User, debug: bool = False) -> Union[Dict[str, Any], None]: 180 """ 181 Get a user's attributes from the users table. 182 """ 183 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 184 users_pipe = self.get_users_pipe() 185 result_df = users_pipe.get_data(['attributes'], params={'user_id': user_id}, debug=debug) 186 if result_df is None or len(result_df) == 0: 187 return None 188 189 return result_df['attributes'][0]
Get a user's attributes from the users table.
16def get_plugins_pipe(self) -> 'mrsm.Pipe': 17 """ 18 Return the internal pipe for syncing plugins metadata. 19 """ 20 if '_plugins_pipe' in self.__dict__: 21 return self._plugins_pipe 22 23 cache_connector = self.__dict__.get('_cache_connector', None) 24 users_pipe = self.get_users_pipe() 25 user_id_dtype = users_pipe.dtypes.get('user_id', 'uuid') 26 27 self._plugins_pipe = mrsm.Pipe( 28 'mrsm', 'plugins', 29 instance=self, 30 target='mrsm_plugins', 31 temporary=True, 32 cache=True, 33 cache_connector_keys=cache_connector, 34 static=True, 35 null_indices=False, 36 columns={ 37 'primary': 'plugin_name', 38 'user_id': 'user_id', 39 }, 40 dtypes={ 41 'plugin_name': 'string', 42 'user_id': user_id_dtype, 43 'attributes': 'json', 44 'version': 'string', 45 }, 46 ) 47 return self._plugins_pipe
Return the internal pipe for syncing plugins metadata.
50def register_plugin(self, plugin: Plugin, debug: bool = False) -> mrsm.SuccessTuple: 51 """ 52 Register a new plugin to the plugins table. 53 """ 54 plugins_pipe = self.get_plugins_pipe() 55 users_pipe = self.get_users_pipe() 56 user_id = self.get_plugin_user_id(plugin) 57 if user_id is not None: 58 username = self.get_username(user_id, debug=debug) 59 return False, f"{plugin} is already registered to '{username}'." 60 61 doc = { 62 'plugin_name': plugin.name, 63 'version': plugin.version, 64 'attributes': plugin.attributes, 65 'user_id': plugin.user_id, 66 } 67 68 sync_success, sync_msg = plugins_pipe.sync( 69 [doc], 70 check_existing=False, 71 debug=debug, 72 ) 73 if not sync_success: 74 return False, f"Failed to register {plugin}:\n{sync_msg}" 75 76 return True, "Success"
Register a new plugin to the plugins table.
79def get_plugin_user_id(self, plugin: Plugin, debug: bool = False) -> Union[uuid.UUID, None]: 80 """ 81 Return the user ID for plugin's owner. 82 """ 83 plugins_pipe = self.get_plugins_pipe() 84 return plugins_pipe.get_value('user_id', {'plugin_name': plugin.name}, debug=debug)
Return the user ID for plugin's owner.
105def delete_plugin(self, plugin: Plugin, debug: bool = False) -> mrsm.SuccessTuple: 106 """ 107 Delete a plugin's registration. 108 """ 109 plugin_id = self.get_plugin_id(plugin, debug=debug) 110 if plugin_id is None: 111 return False, f"{plugin} is not registered." 112 113 plugins_pipe = self.get_plugins_pipe() 114 clear_success, clear_msg = plugins_pipe.clear(params={'plugin_name': plugin.name}, debug=debug) 115 if not clear_success: 116 return False, f"Failed to delete {plugin}:\n{clear_msg}" 117 return True, "Success"
Delete a plugin's registration.
97def get_plugin_id(self, plugin: Plugin, debug: bool = False) -> Union[str, None]: 98 """ 99 Return a plugin's ID. 100 """ 101 user_id = self.get_plugin_user_id(plugin, debug=debug) 102 return plugin.name if user_id is not None else None
Return a plugin's ID.
120def get_plugin_version(self, plugin: Plugin, debug: bool = False) -> Union[str, None]: 121 """ 122 Return the version for a plugin. 123 """ 124 plugins_pipe = self.get_plugins_pipe() 125 return plugins_pipe.get_value('version', {'plugin_name': plugin.name}, debug=debug)
Return the version for a plugin.
136def get_plugins( 137 self, 138 user_id: Optional[int] = None, 139 search_term: Optional[str] = None, 140 debug: bool = False, 141 **kw: Any 142) -> List[str]: 143 """ 144 Return a list of plugin names. 145 """ 146 plugins_pipe = self.get_plugins_pipe() 147 params = {} 148 if user_id: 149 params['user_id'] = user_id 150 151 df = plugins_pipe.get_data(['plugin_name'], params=params, debug=debug) 152 if df is None: 153 return [] 154 155 docs = df.to_dict(orient='records') 156 return [ 157 plugin_name 158 for doc in docs 159 if (plugin_name := doc['plugin_name']).startswith(search_term or '') 160 ]
Return a list of plugin names.
87def get_plugin_username(self, plugin: Plugin, debug: bool = False) -> Union[uuid.UUID, None]: 88 """ 89 Return the username for plugin's owner. 90 """ 91 user_id = self.get_plugin_user_id(plugin, debug=debug) 92 if user_id is None: 93 return None 94 return self.get_username(user_id, debug=debug)
Return the username for plugin's owner.
128def get_plugin_attributes(self, plugin: Plugin, debug: bool = False) -> Dict[str, Any]: 129 """ 130 Return the attributes for a plugin. 131 """ 132 plugins_pipe = self.get_plugins_pipe() 133 return plugins_pipe.get_value('attributes', {'plugin_name': plugin.name}, debug=debug) or {}
Return the attributes for a plugin.
22def get_tokens_pipe(self) -> mrsm.Pipe: 23 """ 24 Return the internal pipe for tokens management. 25 """ 26 if '_tokens_pipe' in self.__dict__: 27 return self._tokens_pipe 28 29 users_pipe = self.get_users_pipe() 30 user_id_dtype = ( 31 users_pipe._attributes.get('parameters', {}).get('dtypes', {}).get('user_id', 'uuid') 32 ) 33 34 cache_connector = self.__dict__.get('_cache_connector', None) 35 36 self._tokens_pipe = mrsm.Pipe( 37 'mrsm', 'tokens', 38 instance=self, 39 target='mrsm_tokens', 40 temporary=True, 41 cache=True, 42 cache_connector_keys=cache_connector, 43 static=True, 44 autotime=True, 45 null_indices=False, 46 columns={ 47 'datetime': 'creation', 48 'primary': 'id', 49 }, 50 indices={ 51 'unique': 'label', 52 'user_id': 'user_id', 53 }, 54 dtypes={ 55 'id': 'uuid', 56 'creation': 'datetime', 57 'expiration': 'datetime', 58 'is_valid': 'bool', 59 'label': 'string', 60 'user_id': user_id_dtype, 61 'scopes': 'json', 62 'secret_hash': 'string', 63 }, 64 ) 65 return self._tokens_pipe
Return the internal pipe for tokens management.
68def register_token( 69 self, 70 token: Token, 71 debug: bool = False, 72) -> mrsm.SuccessTuple: 73 """ 74 Register the new token to the tokens table. 75 """ 76 token_id, token_secret = token.generate_credentials() 77 tokens_pipe = self.get_tokens_pipe() 78 user_id = self.get_user_id(token.user) if token.user is not None else None 79 if user_id is None: 80 return False, "Cannot register a token without a user." 81 82 doc = { 83 'id': token_id, 84 'user_id': user_id, 85 'creation': datetime.now(timezone.utc), 86 'expiration': token.expiration, 87 'label': token.label, 88 'is_valid': token.is_valid, 89 'scopes': list(token.scopes) if token.scopes else [], 90 'secret_hash': hash_password( 91 str(token_secret), 92 rounds=STATIC_CONFIG['tokens']['hash_rounds'] 93 ), 94 } 95 sync_success, sync_msg = tokens_pipe.sync([doc], check_existing=False, debug=debug) 96 if not sync_success: 97 return False, f"Failed to register token:\n{sync_msg}" 98 return True, "Success"
Register the new token to the tokens table.
101def edit_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple: 102 """ 103 Persist the token's in-memory state to the tokens pipe. 104 """ 105 if not token.id: 106 return False, "Token ID is not set." 107 108 if not token.exists(debug=debug): 109 return False, f"Token {token.id} does not exist." 110 111 if not token.creation: 112 token_model = self.get_token_model(token.id) 113 token.creation = token_model.creation 114 115 tokens_pipe = self.get_tokens_pipe() 116 doc = { 117 'id': token.id, 118 'creation': token.creation, 119 'expiration': token.expiration, 120 'label': token.label, 121 'is_valid': token.is_valid, 122 'scopes': list(token.scopes) if token.scopes else [], 123 } 124 sync_success, sync_msg = tokens_pipe.sync([doc], debug=debug) 125 if not sync_success: 126 return False, f"Failed to edit token '{token.id}':\n{sync_msg}" 127 128 return True, "Success"
Persist the token's in-memory state to the tokens pipe.
131def invalidate_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple: 132 """ 133 Set `is_valid` to `False` for the given token. 134 """ 135 if not token.id: 136 return False, "Token ID is not set." 137 138 if not token.exists(debug=debug): 139 return False, f"Token {token.id} does not exist." 140 141 if not token.creation: 142 token_model = self.get_token_model(token.id) 143 token.creation = token_model.creation 144 145 token.is_valid = False 146 tokens_pipe = self.get_tokens_pipe() 147 doc = { 148 'id': token.id, 149 'creation': token.creation, 150 'is_valid': False, 151 } 152 sync_success, sync_msg = tokens_pipe.sync([doc], debug=debug) 153 if not sync_success: 154 return False, f"Failed to invalidate token '{token.id}':\n{sync_msg}" 155 156 return True, "Success"
Set is_valid to False for the given token.
159def delete_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple: 160 """ 161 Delete the given token from the tokens table. 162 """ 163 if not token.id: 164 return False, "Token ID is not set." 165 166 if not token.exists(debug=debug): 167 return False, f"Token {token.id} does not exist." 168 169 if not token.creation: 170 token_model = self.get_token_model(token.id) 171 token.creation = token_model.creation 172 173 token.is_valid = False 174 tokens_pipe = self.get_tokens_pipe() 175 clear_success, clear_msg = tokens_pipe.clear(params={'id': token.id}, debug=debug) 176 if not clear_success: 177 return False, f"Failed to delete token '{token.id}':\n{clear_msg}" 178 179 return True, "Success"
Delete the given token from the tokens table.
235def get_token(self, token_id: Union[uuid.UUID, str], debug: bool = False) -> Union[Token, None]: 236 """ 237 Return the `Token` from its ID. 238 """ 239 from meerschaum.utils.misc import is_uuid 240 if isinstance(token_id, str): 241 if is_uuid(token_id): 242 token_id = uuid.UUID(token_id) 243 else: 244 raise ValueError("Invalid token ID.") 245 token_model = self.get_token_model(token_id) 246 if token_model is None: 247 return None 248 return Token(**dict(token_model))
Return the Token from its ID.
182def get_tokens( 183 self, 184 user: Optional[User] = None, 185 labels: Optional[List[str]] = None, 186 ids: Optional[List[uuid.UUID]] = None, 187 debug: bool = False, 188) -> List[Token]: 189 """ 190 Return a list of `Token` objects. 191 """ 192 tokens_pipe = self.get_tokens_pipe() 193 user_id = ( 194 self.get_user_id(user, debug=debug) 195 if user is not None 196 else None 197 ) 198 user_type = self.get_user_type(user, debug=debug) if user is not None else None 199 params = ( 200 { 201 'user_id': ( 202 user_id 203 if user_type != 'admin' 204 else [user_id, None] 205 ) 206 } 207 if user_id is not None 208 else {} 209 ) 210 if labels: 211 params['label'] = labels 212 if ids: 213 params['id'] = ids 214 215 if debug: 216 dprint(f"Getting tokens with {user_id=}, {params=}") 217 218 tokens_df = tokens_pipe.get_data(params=params, debug=debug) 219 if tokens_df is None: 220 return [] 221 222 if debug: 223 dprint(f"Retrieved tokens dataframe:\n{tokens_df}") 224 225 tokens_docs = tokens_df.to_dict(orient='records') 226 return [ 227 Token( 228 instance=self, 229 **token_doc 230 ) 231 for token_doc in reversed(tokens_docs) 232 ]
Return a list of Token objects.
251def get_token_model(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> 'Union[TokenModel, None]': 252 """ 253 Return a token's model from the instance. 254 """ 255 from meerschaum.models import TokenModel 256 if isinstance(token_id, Token): 257 token_id = Token.id 258 if not token_id: 259 raise ValueError("Invalid token ID.") 260 tokens_pipe = self.get_tokens_pipe() 261 doc = tokens_pipe.get_doc( 262 params={'id': token_id}, 263 debug=debug, 264 ) 265 if doc is None: 266 return None 267 return TokenModel(**doc)
Return a token's model from the instance.
270def get_token_secret_hash(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> Union[str, None]: 271 """ 272 Return the secret hash for a given token. 273 """ 274 if isinstance(token_id, Token): 275 token_id = token_id.id 276 if not token_id: 277 raise ValueError("Invalid token ID.") 278 tokens_pipe = self.get_tokens_pipe() 279 return tokens_pipe.get_value('secret_hash', params={'id': token_id}, debug=debug)
Return the secret hash for a given token.
308def token_exists(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> bool: 309 """ 310 Return `True` if a token exists in the tokens pipe. 311 """ 312 if isinstance(token_id, Token): 313 token_id = token_id.id 314 if not token_id: 315 raise ValueError("Invalid token ID.") 316 317 tokens_pipe = self.get_tokens_pipe() 318 return tokens_pipe.get_value('creation', params={'id': token_id}, debug=debug) is not None
Return True if a token exists in the tokens pipe.
295def get_token_scopes(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> List[str]: 296 """ 297 Return the scopes for a token. 298 """ 299 if isinstance(token_id, Token): 300 token_id = token_id.id 301 if not token_id: 302 raise ValueError("Invalid token ID.") 303 304 tokens_pipe = self.get_tokens_pipe() 305 return tokens_pipe.get_value('scopes', params={'id': token_id}, debug=debug) or []
Return the scopes for a token.
17@abc.abstractmethod 18def register_pipe( 19 self, 20 pipe: mrsm.Pipe, 21 debug: bool = False, 22 **kwargs: Any 23) -> mrsm.SuccessTuple: 24 """ 25 Insert the pipe's attributes into the internal `pipes` table. 26 27 Parameters 28 ---------- 29 pipe: mrsm.Pipe 30 The pipe to be registered. 31 32 Returns 33 ------- 34 A `SuccessTuple` of the result. 35 """
Insert the pipe's attributes into the internal pipes table.
Parameters
- pipe (mrsm.Pipe): The pipe to be registered.
Returns
- A
SuccessTupleof the result.
37@abc.abstractmethod 38def get_pipe_attributes( 39 self, 40 pipe: mrsm.Pipe, 41 debug: bool = False, 42 **kwargs: Any 43) -> Dict[str, Any]: 44 """ 45 Return the pipe's document from the internal `pipes` table. 46 47 Parameters 48 ---------- 49 pipe: mrsm.Pipe 50 The pipe whose attributes should be retrieved. 51 52 Returns 53 ------- 54 The document that matches the keys of the pipe. 55 """
Return the pipe's document from the internal pipes table.
Parameters
- pipe (mrsm.Pipe): The pipe whose attributes should be retrieved.
Returns
- The document that matches the keys of the pipe.
57@abc.abstractmethod 58def get_pipe_id( 59 self, 60 pipe: mrsm.Pipe, 61 debug: bool = False, 62 **kwargs: Any 63) -> Union[str, int, None]: 64 """ 65 Return the `id` for the pipe if it exists. 66 67 Parameters 68 ---------- 69 pipe: mrsm.Pipe 70 The pipe whose `id` to fetch. 71 72 Returns 73 ------- 74 The `id` for the pipe's document or `None`. 75 """
Return the id for the pipe if it exists.
Parameters
- pipe (mrsm.Pipe):
The pipe whose
idto fetch.
Returns
- The
idfor the pipe's document orNone.
77def edit_pipe( 78 self, 79 pipe: mrsm.Pipe, 80 debug: bool = False, 81 **kwargs: Any 82) -> mrsm.SuccessTuple: 83 """ 84 Edit the attributes of the pipe. 85 86 Parameters 87 ---------- 88 pipe: mrsm.Pipe 89 The pipe whose in-memory parameters must be persisted. 90 91 Returns 92 ------- 93 A `SuccessTuple` indicating success. 94 """ 95 raise NotImplementedError
Edit the attributes of the pipe.
Parameters
- pipe (mrsm.Pipe): The pipe whose in-memory parameters must be persisted.
Returns
- A
SuccessTupleindicating success.
97def delete_pipe( 98 self, 99 pipe: mrsm.Pipe, 100 debug: bool = False, 101 **kwargs: Any 102) -> mrsm.SuccessTuple: 103 """ 104 Delete a pipe's registration from the `pipes` collection. 105 106 Parameters 107 ---------- 108 pipe: mrsm.Pipe 109 The pipe to be deleted. 110 111 Returns 112 ------- 113 A `SuccessTuple` indicating success. 114 """ 115 raise NotImplementedError
Delete a pipe's registration from the pipes collection.
Parameters
- pipe (mrsm.Pipe): The pipe to be deleted.
Returns
- A
SuccessTupleindicating success.
117@abc.abstractmethod 118def fetch_pipes_keys( 119 self, 120 connector_keys: Optional[List[str]] = None, 121 metric_keys: Optional[List[str]] = None, 122 location_keys: Optional[List[str]] = None, 123 tags: Optional[List[str]] = None, 124 debug: bool = False, 125 **kwargs: Any 126) -> List[Tuple[str, str, str]]: 127 """ 128 Return a list of tuples for the registered pipes' keys according to the provided filters. 129 130 Parameters 131 ---------- 132 connector_keys: list[str] | None, default None 133 The keys passed via `-c`. 134 135 metric_keys: list[str] | None, default None 136 The keys passed via `-m`. 137 138 location_keys: list[str] | None, default None 139 The keys passed via `-l`. 140 141 tags: List[str] | None, default None 142 Tags passed via `--tags` which are stored under `parameters:tags`. 143 144 Returns 145 ------- 146 A list of connector, metric, and location keys in tuples. 147 You may return the string "None" for location keys in place of nulls. 148 149 Examples 150 -------- 151 >>> import meerschaum as mrsm 152 >>> conn = mrsm.get_connector('example:demo') 153 >>> 154 >>> pipe_a = mrsm.Pipe('a', 'demo', tags=['foo'], instance=conn) 155 >>> pipe_b = mrsm.Pipe('b', 'demo', tags=['bar'], instance=conn) 156 >>> pipe_a.register() 157 >>> pipe_b.register() 158 >>> 159 >>> conn.fetch_pipes_keys(['a', 'b']) 160 [('a', 'demo', 'None'), ('b', 'demo', 'None')] 161 >>> conn.fetch_pipes_keys(metric_keys=['demo']) 162 [('a', 'demo', 'None'), ('b', 'demo', 'None')] 163 >>> conn.fetch_pipes_keys(tags=['foo']) 164 [('a', 'demo', 'None')] 165 >>> conn.fetch_pipes_keys(location_keys=[None]) 166 [('a', 'demo', 'None'), ('b', 'demo', 'None')] 167 """
Return a list of tuples for the registered pipes' keys according to the provided filters.
Parameters
- connector_keys (list[str] | None, default None):
The keys passed via
-c. - metric_keys (list[str] | None, default None):
The keys passed via
-m. - location_keys (list[str] | None, default None):
The keys passed via
-l. - tags (List[str] | None, default None):
Tags passed via
--tagswhich are stored underparameters:tags.
Returns
- A list of connector, metric, and location keys in tuples.
- You may return the string "None" for location keys in place of nulls.
Examples
>>> import meerschaum as mrsm
>>> conn = mrsm.get_connector('example:demo')
>>>
>>> pipe_a = mrsm.Pipe('a', 'demo', tags=['foo'], instance=conn)
>>> pipe_b = mrsm.Pipe('b', 'demo', tags=['bar'], instance=conn)
>>> pipe_a.register()
>>> pipe_b.register()
>>>
>>> conn.fetch_pipes_keys(['a', 'b'])
[('a', 'demo', 'None'), ('b', 'demo', 'None')]
>>> conn.fetch_pipes_keys(metric_keys=['demo'])
[('a', 'demo', 'None'), ('b', 'demo', 'None')]
>>> conn.fetch_pipes_keys(tags=['foo'])
[('a', 'demo', 'None')]
>>> conn.fetch_pipes_keys(location_keys=[None])
[('a', 'demo', 'None'), ('b', 'demo', 'None')]
169@abc.abstractmethod 170def pipe_exists( 171 self, 172 pipe: mrsm.Pipe, 173 debug: bool = False, 174 **kwargs: Any 175) -> bool: 176 """ 177 Check whether a pipe's target table exists. 178 179 Parameters 180 ---------- 181 pipe: mrsm.Pipe 182 The pipe to check whether its table exists. 183 184 Returns 185 ------- 186 A `bool` indicating the table exists. 187 """
Check whether a pipe's target table exists.
Parameters
- pipe (mrsm.Pipe): The pipe to check whether its table exists.
Returns
- A
boolindicating the table exists.
189@abc.abstractmethod 190def drop_pipe( 191 self, 192 pipe: mrsm.Pipe, 193 debug: bool = False, 194 **kwargs: Any 195) -> mrsm.SuccessTuple: 196 """ 197 Drop a pipe's collection if it exists. 198 199 Parameters 200 ---------- 201 pipe: mrsm.Pipe 202 The pipe to be dropped. 203 204 Returns 205 ------- 206 A `SuccessTuple` indicating success. 207 """ 208 raise NotImplementedError
Drop a pipe's collection if it exists.
Parameters
- pipe (mrsm.Pipe): The pipe to be dropped.
Returns
- A
SuccessTupleindicating success.
210def drop_pipe_indices( 211 self, 212 pipe: mrsm.Pipe, 213 debug: bool = False, 214 **kwargs: Any 215) -> mrsm.SuccessTuple: 216 """ 217 Drop a pipe's indices. 218 219 Parameters 220 ---------- 221 pipe: mrsm.Pipe 222 The pipe whose indices need to be dropped. 223 224 Returns 225 ------- 226 A `SuccessTuple` indicating success. 227 """ 228 return False, f"Cannot drop indices for instance connectors of type '{self.type}'."
Drop a pipe's indices.
Parameters
- pipe (mrsm.Pipe): The pipe whose indices need to be dropped.
Returns
- A
SuccessTupleindicating success.
230@abc.abstractmethod 231def sync_pipe( 232 self, 233 pipe: mrsm.Pipe, 234 df: 'pd.DataFrame' = None, 235 begin: Union[datetime, int, None] = None, 236 end: Union[datetime, int, None] = None, 237 chunksize: Optional[int] = -1, 238 check_existing: bool = True, 239 debug: bool = False, 240 **kwargs: Any 241) -> mrsm.SuccessTuple: 242 """ 243 Sync a pipe using a database connection. 244 245 Parameters 246 ---------- 247 pipe: mrsm.Pipe 248 The Meerschaum Pipe instance into which to sync the data. 249 250 df: Optional[pd.DataFrame] 251 An optional DataFrame or equivalent to sync into the pipe. 252 Defaults to `None`. 253 254 begin: Union[datetime, int, None], default None 255 Optionally specify the earliest datetime to search for data. 256 Defaults to `None`. 257 258 end: Union[datetime, int, None], default None 259 Optionally specify the latest datetime to search for data. 260 Defaults to `None`. 261 262 chunksize: Optional[int], default -1 263 Specify the number of rows to sync per chunk. 264 If `-1`, resort to system configuration (default is `900`). 265 A `chunksize` of `None` will sync all rows in one transaction. 266 Defaults to `-1`. 267 268 check_existing: bool, default True 269 If `True`, pull and diff with existing data from the pipe. Defaults to `True`. 270 271 debug: bool, default False 272 Verbosity toggle. Defaults to False. 273 274 Returns 275 ------- 276 A `SuccessTuple` of success (`bool`) and message (`str`). 277 """
Sync a pipe using a database connection.
Parameters
- pipe (mrsm.Pipe): The Meerschaum Pipe instance into which to sync the data.
- df (Optional[pd.DataFrame]):
An optional DataFrame or equivalent to sync into the pipe.
Defaults to
None. - begin (Union[datetime, int, None], default None):
Optionally specify the earliest datetime to search for data.
Defaults to
None. - end (Union[datetime, int, None], default None):
Optionally specify the latest datetime to search for data.
Defaults to
None. - chunksize (Optional[int], default -1):
Specify the number of rows to sync per chunk.
If
-1, resort to system configuration (default is900). AchunksizeofNonewill sync all rows in one transaction. Defaults to-1. - check_existing (bool, default True):
If
True, pull and diff with existing data from the pipe. Defaults toTrue. - debug (bool, default False): Verbosity toggle. Defaults to False.
Returns
- A
SuccessTupleof success (bool) and message (str).
279def create_pipe_indices( 280 self, 281 pipe: mrsm.Pipe, 282 debug: bool = False, 283 **kwargs: Any 284) -> mrsm.SuccessTuple: 285 """ 286 Create a pipe's indices. 287 288 Parameters 289 ---------- 290 pipe: mrsm.Pipe 291 The pipe whose indices need to be created. 292 293 Returns 294 ------- 295 A `SuccessTuple` indicating success. 296 """ 297 return False, f"Cannot create indices for instance connectors of type '{self.type}'."
Create a pipe's indices.
Parameters
- pipe (mrsm.Pipe): The pipe whose indices need to be created.
Returns
- A
SuccessTupleindicating success.
299def clear_pipe( 300 self, 301 pipe: mrsm.Pipe, 302 begin: Union[datetime, int, None] = None, 303 end: Union[datetime, int, None] = None, 304 params: Optional[Dict[str, Any]] = None, 305 debug: bool = False, 306 **kwargs: Any 307) -> mrsm.SuccessTuple: 308 """ 309 Delete rows within `begin`, `end`, and `params`. 310 311 Parameters 312 ---------- 313 pipe: mrsm.Pipe 314 The pipe whose rows to clear. 315 316 begin: datetime | int | None, default None 317 If provided, remove rows >= `begin`. 318 319 end: datetime | int | None, default None 320 If provided, remove rows < `end`. 321 322 params: dict[str, Any] | None, default None 323 If provided, only remove rows which match the `params` filter. 324 325 Returns 326 ------- 327 A `SuccessTuple` indicating success. 328 """ 329 raise NotImplementedError
Delete rows within begin, end, and params.
Parameters
- pipe (mrsm.Pipe): The pipe whose rows to clear.
- begin (datetime | int | None, default None):
If provided, remove rows >=
begin. - end (datetime | int | None, default None):
If provided, remove rows <
end. - params (dict[str, Any] | None, default None):
If provided, only remove rows which match the
paramsfilter.
Returns
- A
SuccessTupleindicating success.
331@abc.abstractmethod 332def get_pipe_data( 333 self, 334 pipe: mrsm.Pipe, 335 select_columns: Optional[List[str]] = None, 336 omit_columns: Optional[List[str]] = None, 337 begin: Union[datetime, int, None] = None, 338 end: Union[datetime, int, None] = None, 339 params: Optional[Dict[str, Any]] = None, 340 debug: bool = False, 341 **kwargs: Any 342) -> Union['pd.DataFrame', None]: 343 """ 344 Query a pipe's target table and return the DataFrame. 345 346 Parameters 347 ---------- 348 pipe: mrsm.Pipe 349 The pipe with the target table from which to read. 350 351 select_columns: list[str] | None, default None 352 If provided, only select these given columns. 353 Otherwise select all available columns (i.e. `SELECT *`). 354 355 omit_columns: list[str] | None, default None 356 If provided, remove these columns from the selection. 357 358 begin: datetime | int | None, default None 359 The earliest `datetime` value to search from (inclusive). 360 361 end: datetime | int | None, default None 362 The lastest `datetime` value to search from (exclusive). 363 364 params: dict[str | str] | None, default None 365 Additional filters to apply to the query. 366 367 Returns 368 ------- 369 The target table's data as a DataFrame. 370 """
Query a pipe's target table and return the DataFrame.
Parameters
- pipe (mrsm.Pipe): The pipe with the target table from which to read.
- select_columns (list[str] | None, default None):
If provided, only select these given columns.
Otherwise select all available columns (i.e.
SELECT *). - omit_columns (list[str] | None, default None): If provided, remove these columns from the selection.
- begin (datetime | int | None, default None):
The earliest
datetimevalue to search from (inclusive). - end (datetime | int | None, default None):
The lastest
datetimevalue to search from (exclusive). - params (dict[str | str] | None, default None): Additional filters to apply to the query.
Returns
- The target table's data as a DataFrame.
372@abc.abstractmethod 373def get_sync_time( 374 self, 375 pipe: mrsm.Pipe, 376 params: Optional[Dict[str, Any]] = None, 377 newest: bool = True, 378 debug: bool = False, 379 **kwargs: Any 380) -> datetime | int | None: 381 """ 382 Return the most recent value for the `datetime` axis. 383 384 Parameters 385 ---------- 386 pipe: mrsm.Pipe 387 The pipe whose collection contains documents. 388 389 params: dict[str, Any] | None, default None 390 Filter certain parameters when determining the sync time. 391 392 newest: bool, default True 393 If `True`, return the maximum value for the column. 394 395 Returns 396 ------- 397 The largest `datetime` or `int` value of the `datetime` axis. 398 """
Return the most recent value for the datetime axis.
Parameters
- pipe (mrsm.Pipe): The pipe whose collection contains documents.
- params (dict[str, Any] | None, default None): Filter certain parameters when determining the sync time.
- newest (bool, default True):
If
True, return the maximum value for the column.
Returns
- The largest
datetimeorintvalue of thedatetimeaxis.
400@abc.abstractmethod 401def get_pipe_columns_types( 402 self, 403 pipe: mrsm.Pipe, 404 debug: bool = False, 405 **kwargs: Any 406) -> Dict[str, str]: 407 """ 408 Return the data types for the columns in the target table for data type enforcement. 409 410 Parameters 411 ---------- 412 pipe: mrsm.Pipe 413 The pipe whose target table contains columns and data types. 414 415 Returns 416 ------- 417 A dictionary mapping columns to data types. 418 """
Return the data types for the columns in the target table for data type enforcement.
Parameters
- pipe (mrsm.Pipe): The pipe whose target table contains columns and data types.
Returns
- A dictionary mapping columns to data types.
420def get_pipe_columns_indices( 421 self, 422 debug: bool = False, 423) -> Dict[str, List[Dict[str, str]]]: 424 """ 425 Return a dictionary mapping columns to metadata about related indices. 426 427 Parameters 428 ---------- 429 pipe: mrsm.Pipe 430 The pipe whose target table has related indices. 431 432 Returns 433 ------- 434 A list of dictionaries with the keys "type" and "name". 435 436 Examples 437 -------- 438 >>> pipe = mrsm.Pipe('demo', 'shirts', columns={'primary': 'id'}, indices={'size_color': ['color', 'size']}) 439 >>> pipe.sync([{'color': 'red', 'size': 'M'}]) 440 >>> pipe.get_columns_indices() 441 {'id': [{'name': 'demo_shirts_pkey', 'type': 'PRIMARY KEY'}], 'color': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}], 'size': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}]} 442 """ 443 return {}
Return a dictionary mapping columns to metadata about related indices.
Parameters
- pipe (mrsm.Pipe): The pipe whose target table has related indices.
Returns
- A list of dictionaries with the keys "type" and "name".
Examples
>>> pipe = mrsm.Pipe('demo', 'shirts', columns={'primary': 'id'}, indices={'size_color': ['color', 'size']})
>>> pipe.sync([{'color': 'red', 'size': 'M'}])
>>> pipe.get_columns_indices()
{'id': [{'name': 'demo_shirts_pkey', 'type': 'PRIMARY KEY'}], 'color': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}], 'size': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}]}
20class SQLConnector(InstanceConnector): 21 """ 22 Connect to SQL databases via `sqlalchemy`. 23 24 SQLConnectors may be used as Meerschaum instance connectors. 25 Read more about connectors and instances at 26 https://meerschaum.io/reference/connectors/ 27 28 """ 29 30 from ._create_engine import flavor_configs, create_engine 31 from ._sql import ( 32 read, 33 value, 34 exec, 35 execute, 36 to_sql, 37 exec_queries, 38 get_connection, 39 _cleanup_connections, 40 ) 41 from meerschaum.utils.sql import test_connection 42 from ._fetch import fetch, get_pipe_metadef 43 from ._cli import cli, _cli_exit 44 from ._pipes import ( 45 fetch_pipes_keys, 46 create_indices, 47 drop_indices, 48 get_create_index_queries, 49 get_drop_index_queries, 50 get_add_columns_queries, 51 get_alter_columns_queries, 52 delete_pipe, 53 get_pipe_data, 54 get_pipe_data_query, 55 register_pipe, 56 edit_pipe, 57 get_pipe_id, 58 get_pipe_attributes, 59 sync_pipe, 60 sync_pipe_inplace, 61 get_sync_time, 62 pipe_exists, 63 get_pipe_rowcount, 64 drop_pipe, 65 clear_pipe, 66 deduplicate_pipe, 67 get_pipe_table, 68 get_pipe_columns_types, 69 get_to_sql_dtype, 70 get_pipe_schema, 71 create_pipe_table_from_df, 72 get_pipe_columns_indices, 73 get_temporary_target, 74 create_pipe_indices, 75 drop_pipe_indices, 76 get_pipe_index_names, 77 _init_geopackage_pipe, 78 ) 79 from ._plugins import ( 80 get_plugins_pipe, 81 register_plugin, 82 delete_plugin, 83 get_plugin_id, 84 get_plugin_version, 85 get_plugins, 86 get_plugin_user_id, 87 get_plugin_username, 88 get_plugin_attributes, 89 ) 90 from ._users import ( 91 get_users_pipe, 92 register_user, 93 get_user_id, 94 get_users, 95 edit_user, 96 delete_user, 97 get_user_password_hash, 98 get_user_type, 99 get_user_attributes, 100 ) 101 from ._uri import from_uri, parse_uri 102 from ._instance import ( 103 _log_temporary_tables_creation, 104 _drop_temporary_table, 105 _drop_temporary_tables, 106 _drop_old_temporary_tables, 107 ) 108 109 def __init__( 110 self, 111 label: Optional[str] = None, 112 flavor: Optional[str] = None, 113 wait: bool = False, 114 connect: bool = False, 115 debug: bool = False, 116 **kw: Any 117 ): 118 """ 119 Parameters 120 ---------- 121 label: str, default 'main' 122 The identifying label for the connector. 123 E.g. for `sql:main`, 'main' is the label. 124 Defaults to 'main'. 125 126 flavor: Optional[str], default None 127 The database flavor, e.g. 128 `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc. 129 To see supported flavors, run the `bootstrap connectors` command. 130 131 wait: bool, default False 132 If `True`, block until a database connection has been made. 133 Defaults to `False`. 134 135 connect: bool, default False 136 If `True`, immediately attempt to connect the database and raise 137 a warning if the connection fails. 138 Defaults to `False`. 139 140 debug: bool, default False 141 Verbosity toggle. 142 Defaults to `False`. 143 144 kw: Any 145 All other arguments will be passed to the connector's attributes. 146 Therefore, a connector may be made without being registered, 147 as long enough parameters are supplied to the constructor. 148 """ 149 if 'uri' in kw: 150 uri = kw['uri'] 151 if uri.startswith('postgres') and not uri.startswith('postgresql'): 152 uri = uri.replace('postgres', 'postgresql', 1) 153 if uri.startswith('postgresql') and not uri.startswith('postgresql+'): 154 uri = uri.replace('postgresql://', 'postgresql+psycopg://', 1) 155 if uri.startswith('timescaledb://'): 156 uri = uri.replace('timescaledb://', 'postgresql+psycopg://', 1) 157 flavor = 'timescaledb' 158 if uri.startswith('timescaledb-ha://'): 159 uri = uri.replace('timescaledb-ha://', 'postgresql+psycopg://', 1) 160 flavor = 'timescaledb-ha' 161 if uri.startswith('postgis://'): 162 uri = uri.replace('postgis://', 'postgresql+psycopg://', 1) 163 flavor = 'postgis' 164 kw['uri'] = uri 165 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 166 label = label or from_uri_params.get('label', None) 167 _ = from_uri_params.pop('label', None) 168 169 ### Sometimes the flavor may be provided with a URI. 170 kw.update(from_uri_params) 171 if flavor: 172 kw['flavor'] = flavor 173 174 ### set __dict__ in base class 175 super().__init__( 176 'sql', 177 label = label or self.__dict__.get('label', None), 178 **kw 179 ) 180 181 if self.__dict__.get('flavor', None) in ('sqlite', 'geopackage'): 182 self._reset_attributes() 183 self._set_attributes( 184 'sql', 185 label = label, 186 inherit_default = False, 187 **kw 188 ) 189 ### For backwards compatability reasons, set the path for sql:local if its missing. 190 if self.label == 'local' and not self.__dict__.get('database', None): 191 from meerschaum.config._paths import SQLITE_DB_PATH 192 self.database = SQLITE_DB_PATH.as_posix() 193 194 ### ensure flavor and label are set accordingly 195 if 'flavor' not in self.__dict__: 196 if flavor is None and 'uri' not in self.__dict__: 197 raise ValueError( 198 f" Missing flavor. Provide flavor as a key for '{self}'." 199 ) 200 self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None) 201 202 if self.flavor == 'postgres': 203 self.flavor = 'postgresql' 204 205 self._debug = debug 206 ### Store the PID and thread at initialization 207 ### so we can dispose of the Pool in child processes or threads. 208 import os 209 import threading 210 self._pid = os.getpid() 211 self._thread_ident = threading.current_thread().ident 212 self._sessions = {} 213 self._locks = {'_sessions': threading.RLock(), } 214 215 ### verify the flavor's requirements are met 216 if self.flavor not in self.flavor_configs: 217 error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector") 218 if not self.__dict__.get('uri'): 219 self.verify_attributes( 220 self.flavor_configs[self.flavor].get('requirements', set()), 221 debug=debug, 222 ) 223 224 if wait: 225 from meerschaum.connectors.poll import retry_connect 226 retry_connect(connector=self, debug=debug) 227 228 if connect: 229 if not self.test_connection(debug=debug): 230 warn(f"Failed to connect with connector '{self}'!", stack=False) 231 232 @property 233 def Session(self): 234 if '_Session' not in self.__dict__: 235 if self.engine is None: 236 return None 237 238 from meerschaum.utils.packages import attempt_import 239 sqlalchemy_orm = attempt_import('sqlalchemy.orm', lazy=False) 240 session_factory = sqlalchemy_orm.sessionmaker(self.engine) 241 self._Session = sqlalchemy_orm.scoped_session(session_factory) 242 243 return self._Session 244 245 @property 246 def engine(self): 247 """ 248 Return the SQLAlchemy engine connected to the configured database. 249 """ 250 import os 251 import threading 252 if '_engine' not in self.__dict__: 253 self._engine, self._engine_str = self.create_engine(include_uri=True) 254 255 same_process = os.getpid() == self._pid 256 same_thread = threading.current_thread().ident == self._thread_ident 257 258 ### handle child processes 259 if not same_process: 260 self._pid = os.getpid() 261 self._thread = threading.current_thread() 262 warn("Different PID detected. Disposing of connections...") 263 self._engine.dispose() 264 265 ### handle different threads 266 if not same_thread: 267 if self.flavor == 'duckdb': 268 warn("Different thread detected.") 269 self._engine.dispose() 270 271 return self._engine 272 273 @property 274 def DATABASE_URL(self) -> str: 275 """ 276 Return the URI connection string (alias for `SQLConnector.URI`. 277 """ 278 _ = self.engine 279 return str(self._engine_str) 280 281 @property 282 def URI(self) -> str: 283 """ 284 Return the URI connection string. 285 """ 286 _ = self.engine 287 return str(self._engine_str) 288 289 @property 290 def IS_THREAD_SAFE(self) -> str: 291 """ 292 Return whether this connector may be multithreaded. 293 """ 294 if self.flavor in ('duckdb', 'oracle'): 295 return False 296 if self.flavor in ('sqlite', 'geopackage'): 297 return ':memory:' not in self.URI 298 return True 299 300 @property 301 def metadata(self): 302 """ 303 Return the metadata bound to this configured schema. 304 """ 305 from meerschaum.utils.packages import attempt_import 306 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 307 if '_metadata' not in self.__dict__: 308 self._metadata = sqlalchemy.MetaData(schema=self.schema) 309 return self._metadata 310 311 @property 312 def instance_schema(self): 313 """ 314 Return the schema name for Meerschaum tables. 315 """ 316 return self.schema 317 318 @property 319 def internal_schema(self): 320 """ 321 Return the schema name for internal tables. 322 """ 323 from meerschaum._internal.static import STATIC_CONFIG 324 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 325 schema_name = self.__dict__.get('internal_schema', None) or ( 326 STATIC_CONFIG['sql']['internal_schema'] 327 if self.flavor not in NO_SCHEMA_FLAVORS 328 else self.schema 329 ) 330 331 if '_internal_schema' not in self.__dict__: 332 self._internal_schema = schema_name 333 return self._internal_schema 334 335 @property 336 def db(self) -> Optional[databases.Database]: 337 from meerschaum.utils.packages import attempt_import 338 databases = attempt_import('databases', lazy=False, install=True) 339 url = self.DATABASE_URL 340 if 'mysql' in url: 341 url = url.replace('+pymysql', '') 342 if '_db' not in self.__dict__: 343 try: 344 self._db = databases.Database(url) 345 except KeyError: 346 ### Likely encountered an unsupported flavor. 347 from meerschaum.utils.warnings import warn 348 self._db = None 349 return self._db 350 351 @property 352 def db_version(self) -> Union[str, None]: 353 """ 354 Return the database version. 355 """ 356 _db_version = self.__dict__.get('_db_version', None) 357 if _db_version is not None: 358 return _db_version 359 360 from meerschaum.utils.sql import get_db_version 361 self._db_version = get_db_version(self) 362 return self._db_version 363 364 @property 365 def schema(self) -> Union[str, None]: 366 """ 367 Return the default schema to use. 368 A value of `None` will not prepend a schema. 369 """ 370 if 'schema' in self.__dict__: 371 return self.__dict__['schema'] 372 373 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 374 if self.flavor in NO_SCHEMA_FLAVORS: 375 self.__dict__['schema'] = None 376 return None 377 378 sqlalchemy = mrsm.attempt_import('sqlalchemy', lazy=False) 379 _schema = sqlalchemy.inspect(self.engine).default_schema_name 380 self.__dict__['schema'] = _schema 381 return _schema 382 383 def get_metadata_cache_path(self, kind: str = 'json') -> pathlib.Path: 384 """ 385 Return the path to the file to which to write metadata cache. 386 """ 387 from meerschaum.config.paths import SQL_CONN_CACHE_RESOURCES_PATH 388 filename = ( 389 f'{self.label}-metadata.pkl' 390 if kind == 'pkl' 391 else f'{self.label}.json' 392 ) 393 return SQL_CONN_CACHE_RESOURCES_PATH / filename 394 395 def __getstate__(self): 396 return self.__dict__ 397 398 def __setstate__(self, d): 399 self.__dict__.update(d) 400 401 def __call__(self): 402 return self
Connect to SQL databases via sqlalchemy.
SQLConnectors may be used as Meerschaum instance connectors. Read more about connectors and instances at https://meerschaum.io/reference/connectors/
109 def __init__( 110 self, 111 label: Optional[str] = None, 112 flavor: Optional[str] = None, 113 wait: bool = False, 114 connect: bool = False, 115 debug: bool = False, 116 **kw: Any 117 ): 118 """ 119 Parameters 120 ---------- 121 label: str, default 'main' 122 The identifying label for the connector. 123 E.g. for `sql:main`, 'main' is the label. 124 Defaults to 'main'. 125 126 flavor: Optional[str], default None 127 The database flavor, e.g. 128 `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc. 129 To see supported flavors, run the `bootstrap connectors` command. 130 131 wait: bool, default False 132 If `True`, block until a database connection has been made. 133 Defaults to `False`. 134 135 connect: bool, default False 136 If `True`, immediately attempt to connect the database and raise 137 a warning if the connection fails. 138 Defaults to `False`. 139 140 debug: bool, default False 141 Verbosity toggle. 142 Defaults to `False`. 143 144 kw: Any 145 All other arguments will be passed to the connector's attributes. 146 Therefore, a connector may be made without being registered, 147 as long enough parameters are supplied to the constructor. 148 """ 149 if 'uri' in kw: 150 uri = kw['uri'] 151 if uri.startswith('postgres') and not uri.startswith('postgresql'): 152 uri = uri.replace('postgres', 'postgresql', 1) 153 if uri.startswith('postgresql') and not uri.startswith('postgresql+'): 154 uri = uri.replace('postgresql://', 'postgresql+psycopg://', 1) 155 if uri.startswith('timescaledb://'): 156 uri = uri.replace('timescaledb://', 'postgresql+psycopg://', 1) 157 flavor = 'timescaledb' 158 if uri.startswith('timescaledb-ha://'): 159 uri = uri.replace('timescaledb-ha://', 'postgresql+psycopg://', 1) 160 flavor = 'timescaledb-ha' 161 if uri.startswith('postgis://'): 162 uri = uri.replace('postgis://', 'postgresql+psycopg://', 1) 163 flavor = 'postgis' 164 kw['uri'] = uri 165 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 166 label = label or from_uri_params.get('label', None) 167 _ = from_uri_params.pop('label', None) 168 169 ### Sometimes the flavor may be provided with a URI. 170 kw.update(from_uri_params) 171 if flavor: 172 kw['flavor'] = flavor 173 174 ### set __dict__ in base class 175 super().__init__( 176 'sql', 177 label = label or self.__dict__.get('label', None), 178 **kw 179 ) 180 181 if self.__dict__.get('flavor', None) in ('sqlite', 'geopackage'): 182 self._reset_attributes() 183 self._set_attributes( 184 'sql', 185 label = label, 186 inherit_default = False, 187 **kw 188 ) 189 ### For backwards compatability reasons, set the path for sql:local if its missing. 190 if self.label == 'local' and not self.__dict__.get('database', None): 191 from meerschaum.config._paths import SQLITE_DB_PATH 192 self.database = SQLITE_DB_PATH.as_posix() 193 194 ### ensure flavor and label are set accordingly 195 if 'flavor' not in self.__dict__: 196 if flavor is None and 'uri' not in self.__dict__: 197 raise ValueError( 198 f" Missing flavor. Provide flavor as a key for '{self}'." 199 ) 200 self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None) 201 202 if self.flavor == 'postgres': 203 self.flavor = 'postgresql' 204 205 self._debug = debug 206 ### Store the PID and thread at initialization 207 ### so we can dispose of the Pool in child processes or threads. 208 import os 209 import threading 210 self._pid = os.getpid() 211 self._thread_ident = threading.current_thread().ident 212 self._sessions = {} 213 self._locks = {'_sessions': threading.RLock(), } 214 215 ### verify the flavor's requirements are met 216 if self.flavor not in self.flavor_configs: 217 error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector") 218 if not self.__dict__.get('uri'): 219 self.verify_attributes( 220 self.flavor_configs[self.flavor].get('requirements', set()), 221 debug=debug, 222 ) 223 224 if wait: 225 from meerschaum.connectors.poll import retry_connect 226 retry_connect(connector=self, debug=debug) 227 228 if connect: 229 if not self.test_connection(debug=debug): 230 warn(f"Failed to connect with connector '{self}'!", stack=False)
Parameters
- label (str, default 'main'):
The identifying label for the connector.
E.g. for
sql:main, 'main' is the label. Defaults to 'main'. - flavor (Optional[str], default None):
The database flavor, e.g.
'sqlite','postgresql','cockroachdb', etc. To see supported flavors, run thebootstrap connectorscommand. - wait (bool, default False):
If
True, block until a database connection has been made. Defaults toFalse. - connect (bool, default False):
If
True, immediately attempt to connect the database and raise a warning if the connection fails. Defaults toFalse. - debug (bool, default False):
Verbosity toggle.
Defaults to
False. - kw (Any): All other arguments will be passed to the connector's attributes. Therefore, a connector may be made without being registered, as long enough parameters are supplied to the constructor.
232 @property 233 def Session(self): 234 if '_Session' not in self.__dict__: 235 if self.engine is None: 236 return None 237 238 from meerschaum.utils.packages import attempt_import 239 sqlalchemy_orm = attempt_import('sqlalchemy.orm', lazy=False) 240 session_factory = sqlalchemy_orm.sessionmaker(self.engine) 241 self._Session = sqlalchemy_orm.scoped_session(session_factory) 242 243 return self._Session
245 @property 246 def engine(self): 247 """ 248 Return the SQLAlchemy engine connected to the configured database. 249 """ 250 import os 251 import threading 252 if '_engine' not in self.__dict__: 253 self._engine, self._engine_str = self.create_engine(include_uri=True) 254 255 same_process = os.getpid() == self._pid 256 same_thread = threading.current_thread().ident == self._thread_ident 257 258 ### handle child processes 259 if not same_process: 260 self._pid = os.getpid() 261 self._thread = threading.current_thread() 262 warn("Different PID detected. Disposing of connections...") 263 self._engine.dispose() 264 265 ### handle different threads 266 if not same_thread: 267 if self.flavor == 'duckdb': 268 warn("Different thread detected.") 269 self._engine.dispose() 270 271 return self._engine
Return the SQLAlchemy engine connected to the configured database.
273 @property 274 def DATABASE_URL(self) -> str: 275 """ 276 Return the URI connection string (alias for `SQLConnector.URI`. 277 """ 278 _ = self.engine 279 return str(self._engine_str)
Return the URI connection string (alias for SQLConnector.URI.
281 @property 282 def URI(self) -> str: 283 """ 284 Return the URI connection string. 285 """ 286 _ = self.engine 287 return str(self._engine_str)
Return the URI connection string.
289 @property 290 def IS_THREAD_SAFE(self) -> str: 291 """ 292 Return whether this connector may be multithreaded. 293 """ 294 if self.flavor in ('duckdb', 'oracle'): 295 return False 296 if self.flavor in ('sqlite', 'geopackage'): 297 return ':memory:' not in self.URI 298 return True
Return whether this connector may be multithreaded.
300 @property 301 def metadata(self): 302 """ 303 Return the metadata bound to this configured schema. 304 """ 305 from meerschaum.utils.packages import attempt_import 306 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 307 if '_metadata' not in self.__dict__: 308 self._metadata = sqlalchemy.MetaData(schema=self.schema) 309 return self._metadata
Return the metadata bound to this configured schema.
311 @property 312 def instance_schema(self): 313 """ 314 Return the schema name for Meerschaum tables. 315 """ 316 return self.schema
Return the schema name for Meerschaum tables.
318 @property 319 def internal_schema(self): 320 """ 321 Return the schema name for internal tables. 322 """ 323 from meerschaum._internal.static import STATIC_CONFIG 324 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 325 schema_name = self.__dict__.get('internal_schema', None) or ( 326 STATIC_CONFIG['sql']['internal_schema'] 327 if self.flavor not in NO_SCHEMA_FLAVORS 328 else self.schema 329 ) 330 331 if '_internal_schema' not in self.__dict__: 332 self._internal_schema = schema_name 333 return self._internal_schema
Return the schema name for internal tables.
335 @property 336 def db(self) -> Optional[databases.Database]: 337 from meerschaum.utils.packages import attempt_import 338 databases = attempt_import('databases', lazy=False, install=True) 339 url = self.DATABASE_URL 340 if 'mysql' in url: 341 url = url.replace('+pymysql', '') 342 if '_db' not in self.__dict__: 343 try: 344 self._db = databases.Database(url) 345 except KeyError: 346 ### Likely encountered an unsupported flavor. 347 from meerschaum.utils.warnings import warn 348 self._db = None 349 return self._db
351 @property 352 def db_version(self) -> Union[str, None]: 353 """ 354 Return the database version. 355 """ 356 _db_version = self.__dict__.get('_db_version', None) 357 if _db_version is not None: 358 return _db_version 359 360 from meerschaum.utils.sql import get_db_version 361 self._db_version = get_db_version(self) 362 return self._db_version
Return the database version.
364 @property 365 def schema(self) -> Union[str, None]: 366 """ 367 Return the default schema to use. 368 A value of `None` will not prepend a schema. 369 """ 370 if 'schema' in self.__dict__: 371 return self.__dict__['schema'] 372 373 from meerschaum.utils.sql import NO_SCHEMA_FLAVORS 374 if self.flavor in NO_SCHEMA_FLAVORS: 375 self.__dict__['schema'] = None 376 return None 377 378 sqlalchemy = mrsm.attempt_import('sqlalchemy', lazy=False) 379 _schema = sqlalchemy.inspect(self.engine).default_schema_name 380 self.__dict__['schema'] = _schema 381 return _schema
Return the default schema to use.
A value of None will not prepend a schema.
383 def get_metadata_cache_path(self, kind: str = 'json') -> pathlib.Path: 384 """ 385 Return the path to the file to which to write metadata cache. 386 """ 387 from meerschaum.config.paths import SQL_CONN_CACHE_RESOURCES_PATH 388 filename = ( 389 f'{self.label}-metadata.pkl' 390 if kind == 'pkl' 391 else f'{self.label}.json' 392 ) 393 return SQL_CONN_CACHE_RESOURCES_PATH / filename
Return the path to the file to which to write metadata cache.
45def create_engine( 46 self, 47 include_uri: bool = False, 48 debug: bool = False, 49 **kw 50) -> 'sqlalchemy.engine.Engine': 51 """Create a sqlalchemy engine by building the engine string.""" 52 from meerschaum.utils.packages import attempt_import 53 from meerschaum.utils.warnings import error, warn 54 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 55 import urllib 56 import copy 57 ### Install and patch required drivers. 58 if self.flavor in install_flavor_drivers: 59 _ = attempt_import( 60 *install_flavor_drivers[self.flavor], 61 debug=debug, 62 lazy=False, 63 warn=False, 64 ) 65 if self.flavor == 'mssql': 66 _init_mssql_sqlalchemy() 67 68 ### supplement missing values with defaults (e.g. port number) 69 for a, value in flavor_configs[self.flavor]['defaults'].items(): 70 if a not in self.__dict__: 71 self.__dict__[a] = value 72 73 ### Verify that everything is in order. 74 if self.flavor not in flavor_configs: 75 error(f"Cannot create a connector with the flavor '{self.flavor}'.") 76 77 _engine = flavor_configs[self.flavor].get('engine', None) 78 _username = self.__dict__.get('username', None) 79 _password = self.__dict__.get('password', None) 80 _host = self.__dict__.get('host', None) 81 _port = self.__dict__.get('port', None) 82 _database = self.__dict__.get('database', None) 83 if _database == '{SQLITE_DB_PATH}': 84 from meerschaum.config.paths import SQLITE_DB_PATH 85 _database = SQLITE_DB_PATH.as_posix() 86 _options = self.__dict__.get('options', {}) 87 if isinstance(_options, str): 88 _options = dict(urllib.parse.parse_qsl(_options)) 89 _uri = self.__dict__.get('uri', None) 90 91 ### Handle registering specific dialects (due to installing in virtual environments). 92 if self.flavor in flavor_dialects: 93 sqlalchemy.dialects.registry.register(*flavor_dialects[self.flavor]) 94 95 ### self._sys_config was deepcopied and can be updated safely 96 if self.flavor in ("sqlite", "duckdb", "geopackage"): 97 engine_str = f"{_engine}:///{_database}" if not _uri else _uri 98 if 'create_engine' not in self._sys_config: 99 self._sys_config['create_engine'] = {} 100 if 'connect_args' not in self._sys_config['create_engine']: 101 self._sys_config['create_engine']['connect_args'] = {} 102 self._sys_config['create_engine']['connect_args'].update({"check_same_thread": False}) 103 else: 104 engine_str = ( 105 _engine + "://" + (_username if _username is not None else '') + 106 ((":" + urllib.parse.quote_plus(_password)) if _password is not None else '') + 107 "@" + _host + ((":" + str(_port)) if _port is not None else '') + 108 (("/" + _database) if _database is not None else '') 109 + (("?" + urllib.parse.urlencode(_options)) if _options else '') 110 ) if not _uri else _uri 111 112 ### Sometimes the timescaledb:// flavor can slip in. 113 if _uri and self.flavor in _uri: 114 if self.flavor in ('timescaledb', 'timescaledb-ha', 'postgis'): 115 engine_str = engine_str.replace(self.flavor, 'postgresql', 1) 116 elif _uri.startswith('postgresql://'): 117 engine_str = engine_str.replace('postgresql://', 'postgresql+psycopg2://') 118 119 if debug: 120 dprint( 121 ( 122 (engine_str.replace(':' + _password, ':' + ('*' * len(_password)))) 123 if _password is not None else engine_str 124 ) + '\n' + f"{self._sys_config.get('create_engine', {}).get('connect_args', {})}" 125 ) 126 127 _kw_copy = copy.deepcopy(kw) 128 129 ### NOTE: Order of inheritance: 130 ### 1. Defaults 131 ### 2. System configuration 132 ### 3. Connector configuration 133 ### 4. Keyword arguments 134 _create_engine_args = flavor_configs.get(self.flavor, {}).get('create_engine', {}) 135 def _apply_create_engine_args(update): 136 if 'ALL' not in flavor_configs[self.flavor].get('omit_create_engine', {}): 137 _create_engine_args.update( 138 { k: v for k, v in update.items() 139 if 'omit_create_engine' not in flavor_configs[self.flavor] 140 or k not in flavor_configs[self.flavor].get('omit_create_engine') 141 } 142 ) 143 _apply_create_engine_args(self._sys_config.get('create_engine', {})) 144 _apply_create_engine_args(self.__dict__.get('create_engine', {})) 145 _apply_create_engine_args(_kw_copy) 146 147 try: 148 engine = sqlalchemy.create_engine( 149 engine_str, 150 ### I know this looks confusing, and maybe it's bad code, 151 ### but it's simple. It dynamically parses the config string 152 ### and splits it to separate the class name (QueuePool) 153 ### from the module name (sqlalchemy.pool). 154 poolclass = getattr( 155 attempt_import( 156 ".".join(self._sys_config['poolclass'].split('.')[:-1]) 157 ), 158 self._sys_config['poolclass'].split('.')[-1] 159 ), 160 echo = debug, 161 **_create_engine_args 162 ) 163 except Exception: 164 warn(f"Failed to create connector '{self}':\n{traceback.format_exc()}", stack=False) 165 engine = None 166 167 if include_uri: 168 return engine, engine_str 169 return engine
Create a sqlalchemy engine by building the engine string.
35def read( 36 self, 37 query_or_table: Union[str, sqlalchemy.Query], 38 params: Union[Dict[str, Any], List[str], None] = None, 39 dtype: Optional[Dict[str, Any]] = None, 40 coerce_float: bool = True, 41 chunksize: Optional[int] = -1, 42 workers: Optional[int] = None, 43 chunk_hook: Optional[Callable[[pandas.DataFrame], Any]] = None, 44 as_hook_results: bool = False, 45 chunks: Optional[int] = None, 46 schema: Optional[str] = None, 47 as_chunks: bool = False, 48 as_iterator: bool = False, 49 as_dask: bool = False, 50 index_col: Optional[str] = None, 51 silent: bool = False, 52 debug: bool = False, 53 **kw: Any 54) -> Union[ 55 pandas.DataFrame, 56 dask.DataFrame, 57 List[pandas.DataFrame], 58 List[Any], 59 None, 60]: 61 """ 62 Read a SQL query or table into a pandas dataframe. 63 64 Parameters 65 ---------- 66 query_or_table: Union[str, sqlalchemy.Query] 67 The SQL query (sqlalchemy Query or string) or name of the table from which to select. 68 69 params: Optional[Dict[str, Any]], default None 70 `List` or `Dict` of parameters to pass to `pandas.read_sql()`. 71 See the pandas documentation for more information: 72 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html 73 74 dtype: Optional[Dict[str, Any]], default None 75 A dictionary of data types to pass to `pandas.read_sql()`. 76 See the pandas documentation for more information: 77 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html 78 79 chunksize: Optional[int], default -1 80 How many chunks to read at a time. `None` will read everything in one large chunk. 81 Defaults to system configuration. 82 83 **NOTE:** DuckDB does not allow for chunking. 84 85 workers: Optional[int], default None 86 How many threads to use when consuming the generator. 87 Only applies if `chunk_hook` is provided. 88 89 chunk_hook: Optional[Callable[[pandas.DataFrame], Any]], default None 90 Hook function to execute once per chunk, e.g. writing and reading chunks intermittently. 91 See `--sync-chunks` for an example. 92 **NOTE:** `as_iterator` MUST be False (default). 93 94 as_hook_results: bool, default False 95 If `True`, return a `List` of the outputs of the hook function. 96 Only applicable if `chunk_hook` is not None. 97 98 **NOTE:** `as_iterator` MUST be `False` (default). 99 100 chunks: Optional[int], default None 101 Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and 102 return into a single dataframe. 103 For example, to limit the returned dataframe to 100,000 rows, 104 you could specify a `chunksize` of `1000` and `chunks` of `100`. 105 106 schema: Optional[str], default None 107 If just a table name is provided, optionally specify the table schema. 108 Defaults to `SQLConnector.schema`. 109 110 as_chunks: bool, default False 111 If `True`, return a list of DataFrames. 112 Otherwise return a single DataFrame. 113 114 as_iterator: bool, default False 115 If `True`, return the pandas DataFrame iterator. 116 `chunksize` must not be `None` (falls back to 1000 if so), 117 and hooks are not called in this case. 118 119 index_col: Optional[str], default None 120 If using Dask, use this column as the index column. 121 If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame. 122 123 silent: bool, default False 124 If `True`, don't raise warnings in case of errors. 125 Defaults to `False`. 126 127 Returns 128 ------- 129 A `pd.DataFrame` (default case), or an iterator, or a list of dataframes / iterators, 130 or `None` if something breaks. 131 132 """ 133 if chunks is not None and chunks <= 0: 134 return [] 135 136 from meerschaum.utils.sql import sql_item_name, truncate_item_name 137 from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone 138 from meerschaum.utils.dtypes.sql import TIMEZONE_NAIVE_FLAVORS 139 from meerschaum.utils.packages import attempt_import, import_pandas 140 from meerschaum.utils.pool import get_pool 141 from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols 142 from meerschaum.utils.misc import filter_arguments 143 import warnings 144 import traceback 145 from decimal import Decimal 146 147 pd = import_pandas() 148 dd = None 149 150 is_dask = 'dask' in pd.__name__ 151 pandas = attempt_import('pandas') 152 is_dask = dd is not None 153 npartitions = chunksize_to_npartitions(chunksize) 154 if is_dask: 155 chunksize = None 156 157 schema = schema or self.schema 158 utc_dt_cols = [ 159 col 160 for col, typ in dtype.items() 161 if are_dtypes_equal(typ, 'datetime') and 'utc' in typ.lower() 162 ] if dtype else [] 163 164 if dtype and utc_dt_cols and self.flavor in TIMEZONE_NAIVE_FLAVORS: 165 dtype = dtype.copy() 166 for col in utc_dt_cols: 167 dtype[col] = 'datetime64[us]' 168 169 pool = get_pool(workers=workers) 170 sqlalchemy = attempt_import("sqlalchemy", lazy=False) 171 default_chunksize = self._sys_config.get('chunksize', None) 172 chunksize = chunksize if chunksize != -1 else default_chunksize 173 if chunksize is None and as_iterator: 174 if not silent and self.flavor not in _disallow_chunks_flavors: 175 warn( 176 "An iterator may only be generated if chunksize is not None.\n" 177 + "Falling back to a chunksize of 1000.", stacklevel=3, 178 ) 179 chunksize = 1000 180 if chunksize is not None and self.flavor in _max_chunks_flavors: 181 if chunksize > _max_chunks_flavors[self.flavor]: 182 if chunksize != default_chunksize: 183 warn( 184 f"The specified chunksize of {chunksize} exceeds the maximum of " 185 + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n" 186 + f" Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.", 187 stacklevel=3, 188 ) 189 chunksize = _max_chunks_flavors[self.flavor] 190 191 if chunksize is not None and self.flavor in _disallow_chunks_flavors: 192 chunksize = None 193 194 if debug: 195 import time 196 start = time.perf_counter() 197 dprint(f"[{self}]\n{query_or_table}") 198 dprint(f"[{self}] Fetching with chunksize: {chunksize}") 199 200 ### This might be sqlalchemy object or the string of a table name. 201 ### We check for spaces and quotes to see if it might be a weird table. 202 if ( 203 ' ' not in str(query_or_table) 204 or ( 205 ' ' in str(query_or_table) 206 and str(query_or_table).startswith('"') 207 and str(query_or_table).endswith('"') 208 ) 209 ): 210 truncated_table_name = truncate_item_name(str(query_or_table), self.flavor) 211 if truncated_table_name != str(query_or_table) and not silent: 212 warn( 213 f"Table '{query_or_table}' is too long for '{self.flavor}'," 214 + f" will instead read the table '{truncated_table_name}'." 215 ) 216 217 query_or_table = sql_item_name(str(query_or_table), self.flavor, schema) 218 if debug: 219 dprint(f"[{self}] Reading from table {query_or_table}") 220 formatted_query = sqlalchemy.text("SELECT * FROM " + str(query_or_table)) 221 str_query = f"SELECT * FROM {query_or_table}" 222 else: 223 str_query = query_or_table 224 225 formatted_query = ( 226 sqlalchemy.text(str_query) 227 if not is_dask and isinstance(str_query, str) 228 else format_sql_query_for_dask(str_query) 229 ) 230 231 def _get_chunk_args_kwargs(_chunk): 232 return filter_arguments( 233 chunk_hook, 234 _chunk, 235 workers=workers, 236 chunksize=chunksize, 237 debug=debug, 238 **kw 239 ) 240 241 chunk_list = [] 242 chunk_hook_results = [] 243 def _process_chunk(_chunk, _retry_on_failure: bool = True): 244 if self.flavor in TIMEZONE_NAIVE_FLAVORS: 245 for col in utc_dt_cols: 246 _chunk[col] = coerce_timezone(_chunk[col], strip_utc=False) 247 if not as_hook_results: 248 chunk_list.append(_chunk) 249 250 if chunk_hook is None: 251 return None 252 253 chunk_args, chunk_kwargs = _get_chunk_args_kwargs(_chunk) 254 255 result = None 256 try: 257 result = chunk_hook(*chunk_args, **chunk_kwargs) 258 except Exception: 259 result = False, traceback.format_exc() 260 from meerschaum.utils.formatting import get_console 261 if not silent: 262 get_console().print_exception() 263 264 ### If the chunk fails to process, try it again one more time. 265 if isinstance(result, tuple) and result[0] is False: 266 if _retry_on_failure: 267 return _process_chunk(_chunk, _retry_on_failure=False) 268 269 return result 270 271 try: 272 stream_results = not as_iterator and chunk_hook is not None and chunksize is not None 273 with warnings.catch_warnings(): 274 warnings.filterwarnings('ignore', 'case sensitivity issues') 275 276 read_sql_query_kwargs = { 277 'params': params, 278 'dtype': dtype, 279 'coerce_float': coerce_float, 280 'index_col': index_col, 281 } 282 if is_dask: 283 if index_col is None: 284 dd = None 285 pd = attempt_import('pandas') 286 read_sql_query_kwargs.update({ 287 'chunksize': chunksize, 288 }) 289 else: 290 read_sql_query_kwargs.update({ 291 'chunksize': chunksize, 292 }) 293 294 if is_dask and dd is not None: 295 ddf = dd.read_sql_query( 296 formatted_query, 297 self.URI, 298 **read_sql_query_kwargs 299 ) 300 else: 301 302 def get_chunk_generator(connectable): 303 chunk_generator = pd.read_sql_query( 304 formatted_query, 305 connectable, # NOTE: test this against `self.engine`. 306 **read_sql_query_kwargs 307 ) 308 309 to_return = ( 310 ( 311 chunk_generator 312 if not (as_hook_results or chunksize is None) 313 else ( 314 _process_chunk(_chunk) 315 for _chunk in chunk_generator 316 ) 317 ) 318 if as_iterator or chunksize is None 319 else ( 320 list(pool.imap(_process_chunk, chunk_generator)) 321 if as_hook_results 322 else None 323 ) 324 ) 325 return chunk_generator, to_return 326 327 if self.flavor in SKIP_READ_TRANSACTION_FLAVORS: 328 chunk_generator, to_return = get_chunk_generator(self.engine) 329 else: 330 with self.engine.begin() as transaction: 331 with transaction.execution_options( 332 stream_results=stream_results, 333 ) as connection: 334 chunk_generator, to_return = get_chunk_generator(connection) 335 336 if to_return is not None: 337 return to_return 338 339 except Exception as e: 340 if debug: 341 dprint(f"[{self}] Failed to execute query:\n\n{query_or_table}\n\n") 342 if not silent: 343 warn(str(e), stacklevel=3) 344 from meerschaum.utils.formatting import get_console 345 if not silent: 346 get_console().print_exception() 347 348 return None 349 350 if is_dask and dd is not None: 351 ddf = ddf.reset_index() 352 return ddf 353 354 chunk_list = [] 355 read_chunks = 0 356 chunk_hook_results = [] 357 if chunksize is None: 358 chunk_list.append(chunk_generator) 359 elif as_iterator: 360 return chunk_generator 361 else: 362 try: 363 for chunk in chunk_generator: 364 if chunk_hook is not None: 365 chunk_args, chunk_kwargs = _get_chunk_args_kwargs(chunk) 366 chunk_hook_results.append(chunk_hook(*chunk_args, **chunk_kwargs)) 367 chunk_list.append(chunk) 368 read_chunks += 1 369 if chunks is not None and read_chunks >= chunks: 370 break 371 except Exception as e: 372 warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3) 373 from meerschaum.utils.formatting import get_console 374 if not silent: 375 get_console().print_exception() 376 377 read_chunks = 0 378 try: 379 for chunk in chunk_generator: 380 if chunk_hook is not None: 381 chunk_args, chunk_kwargs = _get_chunk_args_kwargs(chunk) 382 chunk_hook_results.append(chunk_hook(*chunk_args, **chunk_kwargs)) 383 chunk_list.append(chunk) 384 read_chunks += 1 385 if chunks is not None and read_chunks >= chunks: 386 break 387 except Exception as e: 388 warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3) 389 from meerschaum.utils.formatting import get_console 390 if not silent: 391 get_console().print_exception() 392 393 return None 394 395 ### If no chunks returned, read without chunks 396 ### to get columns 397 if len(chunk_list) == 0: 398 with warnings.catch_warnings(): 399 warnings.filterwarnings('ignore', 'case sensitivity issues') 400 _ = read_sql_query_kwargs.pop('chunksize', None) 401 with self.engine.begin() as connection: 402 chunk_list.append( 403 pd.read_sql_query( 404 formatted_query, 405 connection, 406 **read_sql_query_kwargs 407 ) 408 ) 409 410 ### call the hook on any missed chunks. 411 if chunk_hook is not None and len(chunk_list) > len(chunk_hook_results): 412 for c in chunk_list[len(chunk_hook_results):]: 413 chunk_args, chunk_kwargs = _get_chunk_args_kwargs(c) 414 chunk_hook_results.append(chunk_hook(*chunk_args, **chunk_kwargs)) 415 416 ### chunksize is not None so must iterate 417 if debug: 418 end = time.perf_counter() 419 dprint(f"Fetched {len(chunk_list)} chunks in {round(end - start, 2)} seconds.") 420 421 if as_hook_results: 422 return chunk_hook_results 423 424 ### Skip `pd.concat()` if `as_chunks` is specified. 425 if as_chunks: 426 for c in chunk_list: 427 c.reset_index(drop=True, inplace=True) 428 for col in get_numeric_cols(c): 429 c[col] = c[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x) 430 return chunk_list 431 432 df = pd.concat(chunk_list).reset_index(drop=True) 433 ### NOTE: The calls to `canonical()` are to drop leading and trailing zeroes. 434 for col in get_numeric_cols(df): 435 df[col] = df[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x) 436 437 return df
Read a SQL query or table into a pandas dataframe.
Parameters
- query_or_table (Union[str, sqlalchemy.Query]): The SQL query (sqlalchemy Query or string) or name of the table from which to select.
- params (Optional[Dict[str, Any]], default None):
ListorDictof parameters to pass topandas.read_sql(). See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html - dtype (Optional[Dict[str, Any]], default None):
A dictionary of data types to pass to
pandas.read_sql(). See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html chunksize (Optional[int], default -1): How many chunks to read at a time.
Nonewill read everything in one large chunk. Defaults to system configuration.NOTE: DuckDB does not allow for chunking.
- workers (Optional[int], default None):
How many threads to use when consuming the generator.
Only applies if
chunk_hookis provided. - chunk_hook (Optional[Callable[[pandas.DataFrame], Any]], default None):
Hook function to execute once per chunk, e.g. writing and reading chunks intermittently.
See
--sync-chunksfor an example. NOTE:as_iteratorMUST be False (default). as_hook_results (bool, default False): If
True, return aListof the outputs of the hook function. Only applicable ifchunk_hookis not None.NOTE:
as_iteratorMUST beFalse(default).- chunks (Optional[int], default None):
Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and
return into a single dataframe.
For example, to limit the returned dataframe to 100,000 rows,
you could specify a
chunksizeof1000andchunksof100. - schema (Optional[str], default None):
If just a table name is provided, optionally specify the table schema.
Defaults to
SQLConnector.schema. - as_chunks (bool, default False):
If
True, return a list of DataFrames. Otherwise return a single DataFrame. - as_iterator (bool, default False):
If
True, return the pandas DataFrame iterator.chunksizemust not beNone(falls back to 1000 if so), and hooks are not called in this case. - index_col (Optional[str], default None): If using Dask, use this column as the index column. If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
- silent (bool, default False):
If
True, don't raise warnings in case of errors. Defaults toFalse.
Returns
- A
pd.DataFrame(default case), or an iterator, or a list of dataframes / iterators, - or
Noneif something breaks.
440def value( 441 self, 442 query: str, 443 *args: Any, 444 use_pandas: bool = False, 445 **kw: Any 446) -> Any: 447 """ 448 Execute the provided query and return the first value. 449 450 Parameters 451 ---------- 452 query: str 453 The SQL query to execute. 454 455 *args: Any 456 The arguments passed to `meerschaum.connectors.sql.SQLConnector.exec` 457 if `use_pandas` is `False` (default) or to `meerschaum.connectors.sql.SQLConnector.read`. 458 459 use_pandas: bool, default False 460 If `True`, use `meerschaum.connectors.SQLConnector.read`, otherwise use 461 `meerschaum.connectors.sql.SQLConnector.exec` (default). 462 **NOTE:** This is always `True` for DuckDB. 463 464 **kw: Any 465 See `args`. 466 467 Returns 468 ------- 469 Any value returned from the query. 470 471 """ 472 from meerschaum.utils.packages import attempt_import 473 if self.flavor == 'duckdb': 474 use_pandas = True 475 if use_pandas: 476 try: 477 return self.read(query, *args, **kw).iloc[0, 0] 478 except Exception: 479 return None 480 481 _close = kw.get('close', True) 482 _commit = kw.get('commit', (self.flavor != 'mssql')) 483 484 try: 485 result, connection = self.exec( 486 query, 487 *args, 488 with_connection=True, 489 close=False, 490 commit=_commit, 491 **kw 492 ) 493 first = result.first() if result is not None else None 494 _val = first[0] if first is not None else None 495 except Exception as e: 496 warn(e, stacklevel=3) 497 return None 498 if _close: 499 try: 500 connection.close() 501 except Exception as e: 502 warn("Failed to close connection with exception:\n" + str(e)) 503 return _val
Execute the provided query and return the first value.
Parameters
- query (str): The SQL query to execute.
- *args (Any):
The arguments passed to
meerschaum.connectors.sql.SQLConnector.execifuse_pandasisFalse(default) or tomeerschaum.connectors.sql.SQLConnector.read. - use_pandas (bool, default False):
If
True, usemeerschaum.connectors.SQLConnector.read, otherwise usemeerschaum.connectors.sql.SQLConnector.exec(default). NOTE: This is alwaysTruefor DuckDB. - **kw (Any):
See
args.
Returns
- Any value returned from the query.
517def exec( 518 self, 519 query: str, 520 *args: Any, 521 silent: bool = False, 522 debug: bool = False, 523 commit: Optional[bool] = None, 524 close: Optional[bool] = None, 525 with_connection: bool = False, 526 _connection=None, 527 _transaction=None, 528 **kw: Any 529) -> Union[ 530 sqlalchemy.engine.result.resultProxy, 531 sqlalchemy.engine.cursor.LegacyCursorResult, 532 Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection], 533 Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection], 534 None 535]: 536 """ 537 Execute SQL code and return the `sqlalchemy` result, e.g. when calling stored procedures. 538 539 If inserting data, please use bind variables to avoid SQL injection! 540 541 Parameters 542 ---------- 543 query: Union[str, List[str], Tuple[str]] 544 The query to execute. 545 If `query` is a list or tuple, call `self.exec_queries()` instead. 546 547 args: Any 548 Arguments passed to `sqlalchemy.engine.execute`. 549 550 silent: bool, default False 551 If `True`, suppress warnings. 552 553 commit: Optional[bool], default None 554 If `True`, commit the changes after execution. 555 Causes issues with flavors like `'mssql'`. 556 This does not apply if `query` is a list of strings. 557 558 close: Optional[bool], default None 559 If `True`, close the connection after execution. 560 Causes issues with flavors like `'mssql'`. 561 This does not apply if `query` is a list of strings. 562 563 with_connection: bool, default False 564 If `True`, return a tuple including the connection object. 565 This does not apply if `query` is a list of strings. 566 567 Returns 568 ------- 569 The `sqlalchemy` result object, or a tuple with the connection if `with_connection` is provided. 570 571 """ 572 if isinstance(query, (list, tuple)): 573 return self.exec_queries( 574 list(query), 575 *args, 576 silent=silent, 577 debug=debug, 578 **kw 579 ) 580 581 from meerschaum.utils.packages import attempt_import 582 sqlalchemy = attempt_import("sqlalchemy", lazy=False) 583 if debug: 584 dprint(f"[{self}] Executing query:\n{query}") 585 586 _close = close if close is not None else (self.flavor != 'mssql') 587 _commit = commit if commit is not None else ( 588 (self.flavor != 'mssql' or 'select' not in str(query).lower()) 589 ) 590 591 ### Select and Insert objects need to be compiled (SQLAlchemy 2.0.0+). 592 if not hasattr(query, 'compile'): 593 query = sqlalchemy.text(query) 594 595 connection = _connection if _connection is not None else self.get_connection() 596 597 try: 598 transaction = ( 599 _transaction 600 if _transaction is not None else ( 601 connection.begin() 602 if _commit 603 else None 604 ) 605 ) 606 except sqlalchemy.exc.InvalidRequestError as e: 607 if _connection is not None or _transaction is not None: 608 raise e 609 connection = self.get_connection(rebuild=True) 610 transaction = connection.begin() 611 612 if transaction is not None and not transaction.is_active and _transaction is not None: 613 connection = self.get_connection(rebuild=True) 614 transaction = connection.begin() if _commit else None 615 616 result = None 617 try: 618 result = connection.execute(query, *args, **kw) 619 if _commit: 620 transaction.commit() 621 except Exception as e: 622 if debug: 623 dprint(f"[{self}] Failed to execute query:\n\n{query}\n\n{e}") 624 if not silent: 625 warn(str(e), stacklevel=3) 626 result = None 627 if _commit: 628 if debug: 629 dprint(f"[{self}] Rolling back failed transaction...") 630 transaction.rollback() 631 connection = self.get_connection(rebuild=True) 632 finally: 633 if _close: 634 connection.close() 635 636 if debug: 637 dprint(f"[{self}] Done executing.") 638 639 if with_connection: 640 return result, connection 641 642 return result
Execute SQL code and return the sqlalchemy result, e.g. when calling stored procedures.
If inserting data, please use bind variables to avoid SQL injection!
Parameters
- query (Union[str, List[str], Tuple[str]]):
The query to execute.
If
queryis a list or tuple, callself.exec_queries()instead. - args (Any):
Arguments passed to
sqlalchemy.engine.execute. - silent (bool, default False):
If
True, suppress warnings. - commit (Optional[bool], default None):
If
True, commit the changes after execution. Causes issues with flavors like'mssql'. This does not apply ifqueryis a list of strings. - close (Optional[bool], default None):
If
True, close the connection after execution. Causes issues with flavors like'mssql'. This does not apply ifqueryis a list of strings. - with_connection (bool, default False):
If
True, return a tuple including the connection object. This does not apply ifqueryis a list of strings.
Returns
- The
sqlalchemyresult object, or a tuple with the connection ifwith_connectionis provided.
506def execute( 507 self, 508 *args : Any, 509 **kw : Any 510) -> Optional[sqlalchemy.engine.result.resultProxy]: 511 """ 512 An alias for `meerschaum.connectors.sql.SQLConnector.exec`. 513 """ 514 return self.exec(*args, **kw)
An alias for meerschaum.connectors.sql.SQLConnector.exec.
746def to_sql( 747 self, 748 df: pandas.DataFrame, 749 name: str = None, 750 index: bool = False, 751 if_exists: str = 'replace', 752 method: str = "", 753 chunksize: Optional[int] = -1, 754 schema: Optional[str] = None, 755 safe_copy: bool = True, 756 silent: bool = False, 757 debug: bool = False, 758 as_tuple: bool = False, 759 as_dict: bool = False, 760 _connection=None, 761 _transaction=None, 762 **kw 763) -> Union[bool, SuccessTuple]: 764 """ 765 Upload a DataFrame's contents to the SQL server. 766 767 Parameters 768 ---------- 769 df: pd.DataFrame 770 The DataFrame to be inserted. 771 772 name: str 773 The name of the table to be created. 774 775 index: bool, default False 776 If True, creates the DataFrame's indices as columns. 777 778 if_exists: str, default 'replace' 779 Drop and create the table ('replace') or append if it exists 780 ('append') or raise Exception ('fail'). 781 Options are ['replace', 'append', 'fail']. 782 783 method: str, default '' 784 None or multi. Details on pandas.to_sql. 785 786 chunksize: Optional[int], default -1 787 How many rows to insert at a time. 788 789 schema: Optional[str], default None 790 Optionally override the schema for the table. 791 Defaults to `SQLConnector.schema`. 792 793 safe_copy: bool, defaul True 794 If `True`, copy the dataframe before making any changes. 795 796 as_tuple: bool, default False 797 If `True`, return a (success_bool, message) tuple instead of a `bool`. 798 Defaults to `False`. 799 800 as_dict: bool, default False 801 If `True`, return a dictionary of transaction information. 802 The keys are `success`, `msg`, `start`, `end`, `duration`, `num_rows`, `chunksize`, 803 `method`, and `target`. 804 805 kw: Any 806 Additional arguments will be passed to the DataFrame's `to_sql` function 807 808 Returns 809 ------- 810 Either a `bool` or a `SuccessTuple` (depends on `as_tuple`). 811 """ 812 import time 813 import json 814 from datetime import timedelta 815 from meerschaum.utils.warnings import error, warn 816 import warnings 817 import functools 818 import traceback 819 820 if name is None: 821 error(f"Name must not be `None` to insert data into {self}.") 822 823 ### We're requiring `name` to be positional, and sometimes it's passed in from background jobs. 824 kw.pop('name', None) 825 826 schema = schema or self.schema 827 828 from meerschaum.utils.sql import ( 829 sql_item_name, 830 table_exists, 831 json_flavors, 832 truncate_item_name, 833 DROP_IF_EXISTS_FLAVORS, 834 ) 835 from meerschaum.utils.dataframe import ( 836 get_json_cols, 837 get_numeric_cols, 838 get_uuid_cols, 839 get_bytes_cols, 840 get_geometry_cols, 841 ) 842 from meerschaum.utils.dtypes import ( 843 are_dtypes_equal, 844 coerce_timezone, 845 encode_bytes_for_bytea, 846 serialize_bytes, 847 serialize_decimal, 848 serialize_geometry, 849 json_serialize_value, 850 get_geometry_type_srid, 851 ) 852 from meerschaum.utils.dtypes.sql import ( 853 PD_TO_SQLALCHEMY_DTYPES_FLAVORS, 854 get_db_type_from_pd_type, 855 get_pd_type_from_db_type, 856 get_numeric_precision_scale, 857 ) 858 from meerschaum.utils.misc import interval_str 859 from meerschaum.connectors.sql._create_engine import flavor_configs 860 from meerschaum.utils.packages import attempt_import, import_pandas 861 sqlalchemy = attempt_import('sqlalchemy', debug=debug, lazy=False) 862 pd = import_pandas() 863 is_dask = 'dask' in df.__module__ 864 865 bytes_cols = get_bytes_cols(df) 866 numeric_cols = get_numeric_cols(df) 867 geometry_cols = get_geometry_cols(df) 868 ### NOTE: This excludes non-numeric serialized Decimals (e.g. SQLite). 869 numeric_cols_dtypes = { 870 col: typ 871 for col, typ in kw.get('dtype', {}).items() 872 if ( 873 col in df.columns 874 and 'numeric' in str(typ).lower() 875 ) 876 } 877 numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols]) 878 numeric_cols_precisions_scales = { 879 col: ( 880 (typ.precision, typ.scale) 881 if hasattr(typ, 'precision') 882 else get_numeric_precision_scale(self.flavor) 883 ) 884 for col, typ in numeric_cols_dtypes.items() 885 } 886 geometry_cols_dtypes = { 887 col: typ 888 for col, typ in kw.get('dtype', {}).items() 889 if ( 890 col in df.columns 891 and 'geometry' in str(typ).lower() or 'geography' in str(typ).lower() 892 ) 893 } 894 geometry_cols.extend([col for col in geometry_cols_dtypes if col not in geometry_cols]) 895 geometry_cols_types_srids = { 896 col: (typ.geometry_type, typ.srid) 897 if hasattr(typ, 'srid') 898 else get_geometry_type_srid() 899 for col, typ in geometry_cols_dtypes.items() 900 } 901 902 cols_pd_types = { 903 col: get_pd_type_from_db_type(str(typ)) 904 for col, typ in kw.get('dtype', {}).items() 905 } 906 cols_pd_types.update({ 907 col: f'numeric[{precision},{scale}]' 908 for col, (precision, scale) in numeric_cols_precisions_scales.items() 909 if precision and scale 910 }) 911 cols_db_types = { 912 col: get_db_type_from_pd_type(typ, flavor=self.flavor) 913 for col, typ in cols_pd_types.items() 914 } 915 916 enable_bulk_insert = mrsm.get_config( 917 'system', 'connectors', 'sql', 'bulk_insert', self.flavor, 918 warn=False, 919 ) or False 920 stats = {'target': name} 921 ### resort to defaults if None 922 copied = False 923 use_bulk_insert = False 924 if method == "": 925 if enable_bulk_insert: 926 method = ( 927 functools.partial(mssql_insert_json, cols_types=cols_db_types, debug=debug) 928 if self.flavor == 'mssql' 929 else functools.partial(psql_insert_copy, debug=debug) 930 ) 931 use_bulk_insert = True 932 else: 933 ### Should resolve to 'multi' or `None`. 934 method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi') 935 936 if bytes_cols and (use_bulk_insert or self.flavor == 'oracle'): 937 if safe_copy and not copied: 938 df = df.copy() 939 copied = True 940 bytes_serializer = ( 941 functools.partial(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle')) 942 if self.flavor != 'mssql' 943 else serialize_bytes 944 ) 945 for col in bytes_cols: 946 df[col] = df[col].apply(bytes_serializer) 947 948 ### Check for numeric columns. 949 for col in numeric_cols: 950 precision, scale = numeric_cols_precisions_scales.get( 951 col, 952 get_numeric_precision_scale(self.flavor) 953 ) 954 df[col] = df[col].apply( 955 functools.partial( 956 serialize_decimal, 957 quantize=True, 958 precision=precision, 959 scale=scale, 960 ) 961 ) 962 963 geometry_format = 'wkt' if self.flavor == 'mssql' else ( 964 'gpkg_wkb' 965 if self.flavor == 'geopackage' 966 else 'wkb_hex' 967 ) 968 for col in geometry_cols: 969 geometry_type, srid = geometry_cols_types_srids.get(col, get_geometry_type_srid()) 970 with warnings.catch_warnings(): 971 warnings.simplefilter("ignore") 972 df[col] = df[col].apply( 973 functools.partial( 974 serialize_geometry, 975 geometry_format=geometry_format, 976 ) 977 ) 978 979 stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method) 980 981 default_chunksize = self._sys_config.get('chunksize', None) 982 chunksize = chunksize if chunksize != -1 else default_chunksize 983 if chunksize is not None and self.flavor in _max_chunks_flavors: 984 if chunksize > _max_chunks_flavors[self.flavor]: 985 if chunksize != default_chunksize: 986 warn( 987 f"The specified chunksize of {chunksize} exceeds the maximum of " 988 + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n" 989 + f" Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.", 990 stacklevel = 3, 991 ) 992 chunksize = _max_chunks_flavors[self.flavor] 993 stats['chunksize'] = chunksize 994 995 success, msg = False, "Default to_sql message" 996 start = time.perf_counter() 997 if debug: 998 msg = f"[{self}] Inserting {len(df)} rows with chunksize: {chunksize}..." 999 print(msg, end="", flush=True) 1000 stats['num_rows'] = len(df) 1001 1002 ### Check if the name is too long. 1003 truncated_name = truncate_item_name(name, self.flavor) 1004 if name != truncated_name: 1005 warn( 1006 f"Table '{name}' is too long for '{self.flavor}'," 1007 f" will instead create the table '{truncated_name}'." 1008 ) 1009 1010 ### filter out non-pandas args 1011 import inspect 1012 to_sql_params = inspect.signature(df.to_sql).parameters 1013 to_sql_kw = {} 1014 for k, v in kw.items(): 1015 if k in to_sql_params: 1016 to_sql_kw[k] = v 1017 1018 to_sql_kw.update({ 1019 'name': truncated_name, 1020 'schema': schema, 1021 ('con' if not is_dask else 'uri'): (self.engine if not is_dask else self.URI), 1022 'index': index, 1023 'if_exists': if_exists, 1024 'method': method, 1025 'chunksize': chunksize, 1026 }) 1027 if is_dask: 1028 to_sql_kw.update({ 1029 'parallel': True, 1030 }) 1031 elif _connection is not None: 1032 to_sql_kw['con'] = _connection 1033 1034 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 1035 if self.flavor == 'oracle': 1036 ### For some reason 'replace' doesn't work properly in pandas, 1037 ### so try dropping first. 1038 if if_exists == 'replace' and table_exists(name, self, schema=schema, debug=debug): 1039 success = self.exec( 1040 f"DROP TABLE {if_exists_str}" + sql_item_name(name, 'oracle', schema) 1041 ) is not None 1042 if not success: 1043 warn(f"Unable to drop {name}") 1044 1045 ### Enforce NVARCHAR(2000) as text instead of CLOB. 1046 dtype = to_sql_kw.get('dtype', {}) 1047 for col, typ in df.dtypes.items(): 1048 if are_dtypes_equal(str(typ), 'object'): 1049 dtype[col] = sqlalchemy.types.NVARCHAR(2000) 1050 elif are_dtypes_equal(str(typ), 'int'): 1051 dtype[col] = sqlalchemy.types.INTEGER 1052 to_sql_kw['dtype'] = dtype 1053 elif self.flavor == 'duckdb': 1054 dtype = to_sql_kw.get('dtype', {}) 1055 dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')] 1056 for col in dt_cols: 1057 df[col] = coerce_timezone(df[col], strip_utc=False) 1058 elif self.flavor == 'mssql': 1059 dtype = to_sql_kw.get('dtype', {}) 1060 dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')] 1061 new_dtype = {} 1062 for col in dt_cols: 1063 if col in dtype: 1064 continue 1065 dt_typ = get_db_type_from_pd_type(str(df.dtypes[col]), self.flavor, as_sqlalchemy=True) 1066 if col not in dtype: 1067 new_dtype[col] = dt_typ 1068 1069 dtype.update(new_dtype) 1070 to_sql_kw['dtype'] = dtype 1071 1072 ### Check for JSON columns. 1073 if self.flavor not in json_flavors: 1074 json_cols = get_json_cols(df) 1075 for col in json_cols: 1076 df[col] = df[col].apply( 1077 ( 1078 lambda x: json.dumps(x, default=json_serialize_value, sort_keys=True) 1079 if not isinstance(x, Hashable) 1080 else x 1081 ) 1082 ) 1083 1084 if PD_TO_SQLALCHEMY_DTYPES_FLAVORS['uuid'].get(self.flavor, None) != 'Uuid': 1085 uuid_cols = get_uuid_cols(df) 1086 for col in uuid_cols: 1087 df[col] = df[col].astype(str) 1088 1089 try: 1090 with warnings.catch_warnings(): 1091 warnings.filterwarnings('ignore') 1092 df.to_sql(**to_sql_kw) 1093 success = True 1094 except Exception: 1095 if not silent: 1096 warn(traceback.format_exc()) 1097 success, msg = False, traceback.format_exc() 1098 1099 end = time.perf_counter() 1100 if success: 1101 num_rows = len(df) 1102 msg = ( 1103 f"It took {interval_str(timedelta(seconds=(end - start)))} " 1104 + f"to sync {num_rows:,} row" 1105 + ('s' if num_rows != 1 else '') 1106 + f" to {name}." 1107 ) 1108 stats['start'] = start 1109 stats['end'] = end 1110 stats['duration'] = end - start 1111 1112 if debug: 1113 print(" done.", flush=True) 1114 dprint(msg) 1115 1116 stats['success'] = success 1117 stats['msg'] = msg 1118 if as_tuple: 1119 return success, msg 1120 if as_dict: 1121 return stats 1122 return success
Upload a DataFrame's contents to the SQL server.
Parameters
- df (pd.DataFrame): The DataFrame to be inserted.
- name (str): The name of the table to be created.
- index (bool, default False): If True, creates the DataFrame's indices as columns.
- if_exists (str, default 'replace'): Drop and create the table ('replace') or append if it exists ('append') or raise Exception ('fail'). Options are ['replace', 'append', 'fail'].
- method (str, default ''): None or multi. Details on pandas.to_sql.
- chunksize (Optional[int], default -1): How many rows to insert at a time.
- schema (Optional[str], default None):
Optionally override the schema for the table.
Defaults to
SQLConnector.schema. - safe_copy (bool, defaul True):
If
True, copy the dataframe before making any changes. - as_tuple (bool, default False):
If
True, return a (success_bool, message) tuple instead of abool. Defaults toFalse. - as_dict (bool, default False):
If
True, return a dictionary of transaction information. The keys aresuccess,msg,start,end,duration,num_rows,chunksize,method, andtarget. - kw (Any):
Additional arguments will be passed to the DataFrame's
to_sqlfunction
Returns
- Either a
boolor aSuccessTuple(depends onas_tuple).
645def exec_queries( 646 self, 647 queries: List[ 648 Union[ 649 str, 650 Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]] 651 ] 652 ], 653 break_on_error: bool = False, 654 rollback: bool = True, 655 silent: bool = False, 656 debug: bool = False, 657) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]: 658 """ 659 Execute a list of queries in a single transaction. 660 661 Parameters 662 ---------- 663 queries: List[ 664 Union[ 665 str, 666 Tuple[str, Callable[[], List[str]]] 667 ] 668 ] 669 The queries in the transaction to be executed. 670 If a query is a tuple, the second item of the tuple 671 will be considered a callable hook that returns a list of queries to be executed 672 before the next item in the list. 673 674 break_on_error: bool, default False 675 If `True`, stop executing when a query fails. 676 677 rollback: bool, default True 678 If `break_on_error` is `True`, rollback the transaction if a query fails. 679 680 silent: bool, default False 681 If `True`, suppress warnings. 682 683 Returns 684 ------- 685 A list of SQLAlchemy results. 686 """ 687 from meerschaum.utils.warnings import warn 688 from meerschaum.utils.debug import dprint 689 from meerschaum.utils.packages import attempt_import 690 sqlalchemy, sqlalchemy_orm = attempt_import('sqlalchemy', 'sqlalchemy.orm', lazy=False) 691 session = sqlalchemy_orm.Session(self.engine) 692 693 result = None 694 results = [] 695 with session.begin(): 696 for query in queries: 697 hook = None 698 result = None 699 700 if isinstance(query, tuple): 701 query, hook = query 702 if isinstance(query, str): 703 query = sqlalchemy.text(query) 704 705 if debug: 706 dprint(f"[{self}]\n" + str(query)) 707 708 try: 709 result = session.execute(query) 710 session.flush() 711 except Exception as e: 712 msg = (f"Encountered error while executing:\n{e}") 713 if not silent: 714 warn(msg) 715 elif debug: 716 dprint(f"[{self}]\n" + str(msg)) 717 result = None 718 719 if debug: 720 dprint(f"[{self}] Finished executing.") 721 722 if result is None and break_on_error: 723 if rollback: 724 if debug: 725 dprint(f"[{self}] Rolling back...") 726 session.rollback() 727 results.append(result) 728 break 729 elif result is not None and hook is not None: 730 hook_queries = hook(session) 731 if hook_queries: 732 hook_results = self.exec_queries( 733 hook_queries, 734 break_on_error = break_on_error, 735 rollback=rollback, 736 silent=silent, 737 debug=debug, 738 ) 739 result = (result, hook_results) 740 741 results.append(result) 742 743 return results
Execute a list of queries in a single transaction.
Parameters
- queries (List[): Union[ str, Tuple[str, Callable[[], List[str]]] ]
- ]: The queries in the transaction to be executed. If a query is a tuple, the second item of the tuple will be considered a callable hook that returns a list of queries to be executed before the next item in the list.
- break_on_error (bool, default False):
If
True, stop executing when a query fails. - rollback (bool, default True):
If
break_on_errorisTrue, rollback the transaction if a query fails. - silent (bool, default False):
If
True, suppress warnings.
Returns
- A list of SQLAlchemy results.
1320def get_connection(self, rebuild: bool = False) -> 'sqlalchemy.engine.base.Connection': 1321 """ 1322 Return the current alive connection. 1323 1324 Parameters 1325 ---------- 1326 rebuild: bool, default False 1327 If `True`, close the previous connection and open a new one. 1328 1329 Returns 1330 ------- 1331 A `sqlalchemy.engine.base.Connection` object. 1332 """ 1333 import threading 1334 if '_thread_connections' not in self.__dict__: 1335 self.__dict__['_thread_connections'] = {} 1336 1337 self._cleanup_connections() 1338 1339 thread_id = threading.get_ident() 1340 1341 thread_connections = self.__dict__.get('_thread_connections', {}) 1342 connection = thread_connections.get(thread_id, None) 1343 1344 if rebuild and connection is not None: 1345 try: 1346 connection.close() 1347 except Exception: 1348 pass 1349 1350 _ = thread_connections.pop(thread_id, None) 1351 connection = None 1352 1353 if connection is None or connection.closed: 1354 connection = self.engine.connect() 1355 thread_connections[thread_id] = connection 1356 1357 return connection
Return the current alive connection.
Parameters
- rebuild (bool, default False):
If
True, close the previous connection and open a new one.
Returns
- A
sqlalchemy.engine.base.Connectionobject.
863def test_connection( 864 self, 865 **kw: Any 866) -> Union[bool, None]: 867 """ 868 Test if a successful connection to the database may be made. 869 870 Parameters 871 ---------- 872 **kw: 873 The keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`. 874 875 Returns 876 ------- 877 `True` if a connection is made, otherwise `False` or `None` in case of failure. 878 879 """ 880 import warnings 881 from meerschaum.connectors.poll import retry_connect 882 _default_kw = {'max_retries': 1, 'retry_wait': 0, 'warn': False, 'connector': self} 883 _default_kw.update(kw) 884 with warnings.catch_warnings(): 885 warnings.filterwarnings('ignore', 'Could not') 886 try: 887 return retry_connect(**_default_kw) 888 except Exception: 889 return False
Test if a successful connection to the database may be made.
Parameters
- **kw:: The keyword arguments are passed to
meerschaum.connectors.poll.retry_connect.
Returns
Trueif a connection is made, otherwiseFalseorNonein case of failure.
18def fetch( 19 self, 20 pipe: mrsm.Pipe, 21 begin: Union[datetime, int, str, None] = '', 22 end: Union[datetime, int, str, None] = None, 23 check_existing: bool = True, 24 chunksize: Optional[int] = -1, 25 workers: Optional[int] = None, 26 debug: bool = False, 27 **kw: Any 28) -> Union['pd.DataFrame', List[Any], None]: 29 """Execute the SQL definition and return a Pandas DataFrame. 30 31 Parameters 32 ---------- 33 pipe: mrsm.Pipe 34 The pipe object which contains the `fetch` metadata. 35 36 - pipe.columns['datetime']: str 37 - Name of the datetime column for the remote table. 38 - pipe.parameters['fetch']: Dict[str, Any] 39 - Parameters necessary to execute a query. 40 - pipe.parameters['fetch']['definition']: str 41 - Raw SQL query to execute to generate the pandas DataFrame. 42 - pipe.parameters['fetch']['backtrack_minutes']: Union[int, float] 43 - How many minutes before `begin` to search for data (*optional*). 44 45 begin: Union[datetime, int, str, None], default None 46 Most recent datatime to search for data. 47 If `backtrack_minutes` is provided, subtract `backtrack_minutes`. 48 49 end: Union[datetime, int, str, None], default None 50 The latest datetime to search for data. 51 If `end` is `None`, do not bound 52 53 check_existing: bool, defult True 54 If `False`, use a backtrack interval of 0 minutes. 55 56 chunksize: Optional[int], default -1 57 How many rows to load into memory at once. 58 Otherwise the entire result set is loaded into memory. 59 60 workers: Optional[int], default None 61 How many threads to use when consuming the generator. 62 Defaults to the number of cores. 63 64 debug: bool, default False 65 Verbosity toggle. 66 67 Returns 68 ------- 69 A pandas DataFrame generator. 70 """ 71 meta_def = self.get_pipe_metadef( 72 pipe, 73 begin=begin, 74 end=end, 75 check_existing=check_existing, 76 debug=debug, 77 **kw 78 ) 79 chunks = self.read( 80 meta_def, 81 chunksize=chunksize, 82 workers=workers, 83 as_iterator=True, 84 debug=debug, 85 ) 86 return chunks
Execute the SQL definition and return a Pandas DataFrame.
Parameters
pipe (mrsm.Pipe): The pipe object which contains the
fetchmetadata.- pipe.columns['datetime']: str
- Name of the datetime column for the remote table.
- pipe.parameters['fetch']: Dict[str, Any]
- Parameters necessary to execute a query.
- pipe.parameters['fetch']['definition']: str
- Raw SQL query to execute to generate the pandas DataFrame.
- pipe.parameters['fetch']['backtrack_minutes']: Union[int, float]
- How many minutes before
beginto search for data (optional).
- How many minutes before
- pipe.columns['datetime']: str
- begin (Union[datetime, int, str, None], default None):
Most recent datatime to search for data.
If
backtrack_minutesis provided, subtractbacktrack_minutes. - end (Union[datetime, int, str, None], default None):
The latest datetime to search for data.
If
endisNone, do not bound - check_existing (bool, defult True):
If
False, use a backtrack interval of 0 minutes. - chunksize (Optional[int], default -1): How many rows to load into memory at once. Otherwise the entire result set is loaded into memory.
- workers (Optional[int], default None): How many threads to use when consuming the generator. Defaults to the number of cores.
- debug (bool, default False): Verbosity toggle.
Returns
- A pandas DataFrame generator.
89def get_pipe_metadef( 90 self, 91 pipe: mrsm.Pipe, 92 params: Optional[Dict[str, Any]] = None, 93 begin: Union[datetime, int, str, None] = '', 94 end: Union[datetime, int, str, None] = None, 95 check_existing: bool = True, 96 debug: bool = False, 97 **kw: Any 98) -> Union[str, None]: 99 """ 100 Return a pipe's meta definition fetch query. 101 102 params: Optional[Dict[str, Any]], default None 103 Optional params dictionary to build the `WHERE` clause. 104 See `meerschaum.utils.sql.build_where`. 105 106 begin: Union[datetime, int, str, None], default None 107 Most recent datatime to search for data. 108 If `backtrack_minutes` is provided, subtract `backtrack_minutes`. 109 110 end: Union[datetime, int, str, None], default None 111 The latest datetime to search for data. 112 If `end` is `None`, do not bound 113 114 check_existing: bool, default True 115 If `True`, apply the backtrack interval. 116 117 debug: bool, default False 118 Verbosity toggle. 119 120 Returns 121 ------- 122 A pipe's meta definition fetch query string. 123 """ 124 from meerschaum.utils.warnings import warn 125 from meerschaum.utils.sql import sql_item_name, dateadd_str, build_where 126 from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type 127 from meerschaum.config import get_config 128 129 dt_col = pipe.columns.get('datetime', None) 130 if not dt_col: 131 dt_col = pipe.guess_datetime() 132 dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None 133 is_guess = True 134 else: 135 dt_name = sql_item_name(dt_col, self.flavor, None) 136 is_guess = False 137 dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None 138 db_dt_typ = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None 139 140 if begin not in (None, '') or end is not None: 141 if is_guess: 142 if dt_col is None: 143 warn( 144 f"Unable to determine a datetime column for {pipe}." 145 + "\n Ignoring begin and end...", 146 stack=False, 147 ) 148 begin, end = '', None 149 else: 150 warn( 151 f"A datetime wasn't specified for {pipe}.\n" 152 + f" Using column \"{dt_col}\" for datetime bounds...", 153 stack=False 154 ) 155 156 apply_backtrack = begin == '' and check_existing 157 backtrack_interval = pipe.get_backtrack_interval(check_existing=check_existing, debug=debug) 158 btm = ( 159 int(backtrack_interval.total_seconds() / 60) 160 if isinstance(backtrack_interval, timedelta) 161 else backtrack_interval 162 ) 163 begin = ( 164 pipe.get_sync_time(debug=debug) 165 if begin == '' 166 else begin 167 ) 168 169 if begin not in (None, '') and end is not None and begin >= end: 170 begin = None 171 172 if dt_name: 173 begin_da = ( 174 dateadd_str( 175 flavor=self.flavor, 176 datepart='minute', 177 number=((-1 * btm) if apply_backtrack else 0), 178 begin=begin, 179 db_type=db_dt_typ, 180 ) 181 if begin not in ('', None) 182 else None 183 ) 184 end_da = ( 185 dateadd_str( 186 flavor=self.flavor, 187 datepart='minute', 188 number=0, 189 begin=end, 190 db_type=db_dt_typ, 191 ) 192 if end is not None 193 else None 194 ) 195 196 definition_name = sql_item_name('definition', self.flavor, None) 197 meta_def = ( 198 _simple_fetch_query(pipe, self.flavor) if ( 199 (not (pipe.columns or {}).get('id', None)) 200 or (not get_config('system', 'experimental', 'join_fetch')) 201 ) else _join_fetch_query(pipe, self.flavor, debug=debug, **kw) 202 ) 203 204 has_where = 'where' in meta_def.lower()[meta_def.lower().rfind('definition'):] 205 if dt_name and (begin_da or end_da): 206 definition_dt_name = f"{definition_name}.{dt_name}" 207 meta_def += "\n" + ("AND" if has_where else "WHERE") + " " 208 has_where = True 209 if begin_da: 210 meta_def += f"\n {definition_dt_name}\n >=\n {begin_da}\n" 211 if begin_da and end_da: 212 meta_def += " AND" 213 if end_da: 214 meta_def += f"\n {definition_dt_name}\n <\n {end_da}\n" 215 216 if params is not None: 217 params_where = build_where(params, self, with_where=False) 218 meta_def += "\n " + ("AND" if has_where else "WHERE") + " " 219 has_where = True 220 meta_def += params_where 221 222 return meta_def.rstrip()
Return a pipe's meta definition fetch query.
params: Optional[Dict[str, Any]], default None
Optional params dictionary to build the WHERE clause.
See meerschaum.utils.sql.build_where.
begin: Union[datetime, int, str, None], default None
Most recent datatime to search for data.
If backtrack_minutes is provided, subtract backtrack_minutes.
end: Union[datetime, int, str, None], default None
The latest datetime to search for data.
If end is None, do not bound
check_existing: bool, default True
If True, apply the backtrack interval.
debug: bool, default False Verbosity toggle.
Returns
- A pipe's meta definition fetch query string.
39def cli( 40 self, 41 debug: bool = False, 42) -> SuccessTuple: 43 """ 44 Launch a subprocess for an interactive CLI. 45 """ 46 from meerschaum.utils.warnings import dprint 47 from meerschaum.utils.venv import venv_exec 48 49 ### Initialize the engine so that dependencies are resolved. 50 _ = self.engine 51 52 env = copy.deepcopy(dict(os.environ)) 53 env_key = f"MRSM_SQL_{self.label.upper()}" 54 env_val = json.dumps(self.meta) 55 env[env_key] = env_val 56 cli_code = ( 57 "import sys\n" 58 "import meerschaum as mrsm\n" 59 "import os\n" 60 f"conn = mrsm.get_connector('sql:{self.label}')\n" 61 "success, msg = conn._cli_exit()\n" 62 "mrsm.pprint((success, msg))\n" 63 "if not success:\n" 64 " raise Exception(msg)" 65 ) 66 if debug: 67 dprint(cli_code) 68 try: 69 _ = venv_exec(cli_code, venv=None, env=env, debug=debug, capture_output=False) 70 except Exception as e: 71 return False, f"[{self}] Failed to start CLI:\n{e}" 72 return True, "Success"
Launch a subprocess for an interactive CLI.
144def fetch_pipes_keys( 145 self, 146 connector_keys: Optional[List[str]] = None, 147 metric_keys: Optional[List[str]] = None, 148 location_keys: Optional[List[str]] = None, 149 tags: Optional[List[str]] = None, 150 params: Optional[Dict[str, Any]] = None, 151 debug: bool = False, 152) -> List[ 153 Tuple[str, str, Union[str, None], Dict[str, Any]] 154 ]: 155 """ 156 Return a list of tuples corresponding to the parameters provided. 157 158 Parameters 159 ---------- 160 connector_keys: Optional[List[str]], default None 161 List of connector_keys to search by. 162 163 metric_keys: Optional[List[str]], default None 164 List of metric_keys to search by. 165 166 location_keys: Optional[List[str]], default None 167 List of location_keys to search by. 168 169 tags: Optional[List[str]], default None 170 List of pipes to search by. 171 172 params: Optional[Dict[str, Any]], default None 173 Dictionary of additional parameters to search by. 174 E.g. `--params pipe_id:1` 175 176 debug: bool, default False 177 Verbosity toggle. 178 179 Returns 180 ------- 181 A list of tuples of pipes' keys and parameters (connector_keys, metric_key, location_key, parameters). 182 """ 183 from meerschaum.utils.packages import attempt_import 184 from meerschaum.utils.misc import separate_negation_values 185 from meerschaum.utils.sql import ( 186 OMIT_NULLSFIRST_FLAVORS, 187 table_exists, 188 json_flavors, 189 ) 190 from meerschaum._internal.static import STATIC_CONFIG 191 import json 192 from copy import deepcopy 193 sqlalchemy, sqlalchemy_sql_functions = attempt_import( 194 'sqlalchemy', 195 'sqlalchemy.sql.functions', lazy=False, 196 ) 197 coalesce = sqlalchemy_sql_functions.coalesce 198 199 if connector_keys is None: 200 connector_keys = [] 201 if metric_keys is None: 202 metric_keys = [] 203 if location_keys is None: 204 location_keys = [] 205 else: 206 location_keys = [ 207 ( 208 lk 209 if lk not in ('[None]', 'None', 'null') 210 else 'None' 211 ) 212 for lk in location_keys 213 ] 214 if tags is None: 215 tags = [] 216 217 if params is None: 218 params = {} 219 220 ### Add three primary keys to params dictionary 221 ### (separated for convenience of arguments). 222 cols = { 223 'connector_keys': [str(ck) for ck in connector_keys], 224 'metric_key': [str(mk) for mk in metric_keys], 225 'location_key': [str(lk) for lk in location_keys], 226 } 227 228 ### Make deep copy so we don't mutate this somewhere else. 229 parameters = deepcopy(params) 230 for col, vals in cols.items(): 231 if vals not in [[], ['*']]: 232 parameters[col] = vals 233 234 if not table_exists('mrsm_pipes', self, schema=self.instance_schema, debug=debug): 235 return [] 236 237 from meerschaum.connectors.sql.tables import get_tables 238 pipes_tbl = get_tables(mrsm_instance=self, create=False, debug=debug)['pipes'] 239 240 _params = {} 241 for k, v in parameters.items(): 242 _v = json.dumps(v) if isinstance(v, dict) else v 243 _params[k] = _v 244 245 negation_prefix = STATIC_CONFIG['system']['fetch_pipes_keys']['negation_prefix'] 246 ### Parse regular params. 247 ### If a param begins with '_', negate it instead. 248 _where = [ 249 ( 250 (coalesce(pipes_tbl.c[key], 'None') == val) 251 if not str(val).startswith(negation_prefix) 252 else (pipes_tbl.c[key] != key) 253 ) for key, val in _params.items() 254 if not isinstance(val, (list, tuple)) and key in pipes_tbl.c 255 ] 256 if self.flavor in json_flavors: 257 sqlalchemy_dialects = mrsm.attempt_import('sqlalchemy.dialects', lazy=False) 258 JSONB = sqlalchemy_dialects.postgresql.JSONB 259 else: 260 JSONB = sqlalchemy.String 261 262 select_cols = ( 263 [ 264 pipes_tbl.c.connector_keys, 265 pipes_tbl.c.metric_key, 266 pipes_tbl.c.location_key, 267 pipes_tbl.c.parameters, 268 ] 269 ) 270 271 q = sqlalchemy.select(*select_cols).where(sqlalchemy.and_(True, *_where)) 272 for c, vals in cols.items(): 273 if not isinstance(vals, (list, tuple)) or not vals or c not in pipes_tbl.c: 274 continue 275 _in_vals, _ex_vals = separate_negation_values(vals) 276 q = q.where(coalesce(pipes_tbl.c[c], 'None').in_(_in_vals)) if _in_vals else q 277 q = q.where(coalesce(pipes_tbl.c[c], 'None').not_in(_ex_vals)) if _ex_vals else q 278 279 ### Finally, parse tags. 280 tag_groups = [tag.split(',') for tag in tags] 281 in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups] 282 283 ors, nands = [], [] 284 if self.flavor in json_flavors: 285 tags_jsonb = pipes_tbl.c['parameters'].cast(JSONB).op('->')('tags').cast(JSONB) 286 for _in_tags, _ex_tags in in_ex_tag_groups: 287 if _in_tags: 288 ors.append( 289 sqlalchemy.and_( 290 tags_jsonb.contains(_in_tags) 291 ) 292 ) 293 for xt in _ex_tags: 294 nands.append( 295 sqlalchemy.not_( 296 sqlalchemy.and_( 297 tags_jsonb.contains([xt]) 298 ) 299 ) 300 ) 301 else: 302 for _in_tags, _ex_tags in in_ex_tag_groups: 303 sub_ands = [] 304 for nt in _in_tags: 305 sub_ands.append( 306 sqlalchemy.cast( 307 pipes_tbl.c['parameters'], 308 sqlalchemy.String, 309 ).like(f'%"tags":%"{nt}"%') 310 ) 311 if sub_ands: 312 ors.append(sqlalchemy.and_(*sub_ands)) 313 314 for xt in _ex_tags: 315 nands.append( 316 sqlalchemy.cast( 317 pipes_tbl.c['parameters'], 318 sqlalchemy.String, 319 ).not_like(f'%"tags":%"{xt}"%') 320 ) 321 322 q = q.where(sqlalchemy.and_(*nands)) if nands else q 323 q = q.where(sqlalchemy.or_(*ors)) if ors else q 324 loc_asc = sqlalchemy.asc(pipes_tbl.c['location_key']) 325 if self.flavor not in OMIT_NULLSFIRST_FLAVORS: 326 loc_asc = sqlalchemy.nullsfirst(loc_asc) 327 q = q.order_by( 328 sqlalchemy.asc(pipes_tbl.c['connector_keys']), 329 sqlalchemy.asc(pipes_tbl.c['metric_key']), 330 loc_asc, 331 ) 332 333 ### execute the query and return a list of tuples 334 if debug: 335 dprint(q) 336 try: 337 rows = ( 338 self.execute(q).fetchall() 339 if self.flavor != 'duckdb' 340 else [ 341 (row['connector_keys'], row['metric_key'], row['location_key']) 342 for row in self.read(q).to_dict(orient='records') 343 ] 344 ) 345 except Exception as e: 346 error(str(e)) 347 348 return rows
Return a list of tuples corresponding to the parameters provided.
Parameters
- connector_keys (Optional[List[str]], default None): List of connector_keys to search by.
- metric_keys (Optional[List[str]], default None): List of metric_keys to search by.
- location_keys (Optional[List[str]], default None): List of location_keys to search by.
- tags (Optional[List[str]], default None): List of pipes to search by.
- params (Optional[Dict[str, Any]], default None):
Dictionary of additional parameters to search by.
E.g.
--params pipe_id:1 - debug (bool, default False): Verbosity toggle.
Returns
- A list of tuples of pipes' keys and parameters (connector_keys, metric_key, location_key, parameters).
369def create_indices( 370 self, 371 pipe: mrsm.Pipe, 372 columns: Optional[List[str]] = None, 373 indices: Optional[List[str]] = None, 374 debug: bool = False 375) -> bool: 376 """ 377 Create a pipe's indices. 378 """ 379 if pipe.__dict__.get('_skip_check_indices', False): 380 return True 381 382 if debug: 383 dprint(f"Creating indices for {pipe}...") 384 385 if not pipe.indices: 386 warn(f"{pipe} has no index columns; skipping index creation.", stack=False) 387 return True 388 389 cols_to_include = set((columns or []) + (indices or [])) or None 390 391 pipe._clear_cache_key('_columns_indices', debug=debug) 392 ix_queries = { 393 col: queries 394 for col, queries in self.get_create_index_queries(pipe, debug=debug).items() 395 if cols_to_include is None or col in cols_to_include 396 } 397 success = True 398 for col, queries in ix_queries.items(): 399 ix_success = all(self.exec_queries(queries, debug=debug, silent=False)) 400 success = success and ix_success 401 if not ix_success: 402 warn(f"Failed to create index on column: {col}") 403 404 return success
Create a pipe's indices.
425def drop_indices( 426 self, 427 pipe: mrsm.Pipe, 428 columns: Optional[List[str]] = None, 429 indices: Optional[List[str]] = None, 430 debug: bool = False 431) -> bool: 432 """ 433 Drop a pipe's indices. 434 """ 435 if debug: 436 dprint(f"Dropping indices for {pipe}...") 437 438 if not pipe.indices: 439 warn(f"No indices to drop for {pipe}.", stack=False) 440 return False 441 442 cols_to_include = set((columns or []) + (indices or [])) or None 443 444 ix_queries = { 445 col: queries 446 for col, queries in self.get_drop_index_queries(pipe, debug=debug).items() 447 if cols_to_include is None or col in cols_to_include 448 } 449 success = True 450 for col, queries in ix_queries.items(): 451 ix_success = all(self.exec_queries(queries, debug=debug, silent=(not debug))) 452 if not ix_success: 453 success = False 454 if debug: 455 dprint(f"Failed to drop index on column: {col}") 456 return success
Drop a pipe's indices.
512def get_create_index_queries( 513 self, 514 pipe: mrsm.Pipe, 515 debug: bool = False, 516) -> Dict[str, List[str]]: 517 """ 518 Return a dictionary mapping columns to a `CREATE INDEX` or equivalent query. 519 520 Parameters 521 ---------- 522 pipe: mrsm.Pipe 523 The pipe to which the queries will correspond. 524 525 Returns 526 ------- 527 A dictionary of index names mapping to lists of queries. 528 """ 529 ### NOTE: Due to recent breaking changes in DuckDB, indices don't behave properly. 530 if self.flavor == 'duckdb': 531 return {} 532 from meerschaum.utils.sql import ( 533 sql_item_name, 534 get_distinct_col_count, 535 UPDATE_QUERIES, 536 get_null_replacement, 537 get_create_table_queries, 538 get_rename_table_queries, 539 COALESCE_UNIQUE_INDEX_FLAVORS, 540 ) 541 from meerschaum.utils.dtypes import are_dtypes_equal 542 from meerschaum.utils.dtypes.sql import ( 543 get_db_type_from_pd_type, 544 get_pd_type_from_db_type, 545 AUTO_INCREMENT_COLUMN_FLAVORS, 546 ) 547 from meerschaum.config import get_config 548 index_queries = {} 549 550 upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES 551 static = pipe.parameters.get('static', False) 552 null_indices = pipe.parameters.get('null_indices', True) 553 index_names = pipe.get_indices() 554 unique_index_name_unquoted = index_names.get('unique', None) or f'IX_{pipe.target}_unique' 555 if upsert: 556 _ = index_names.pop('unique', None) 557 indices = pipe.indices 558 existing_cols_types = pipe.get_columns_types(debug=debug) 559 existing_cols_pd_types = { 560 col: get_pd_type_from_db_type(typ) 561 for col, typ in existing_cols_types.items() 562 } 563 existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug) 564 existing_ix_names = set() 565 existing_primary_keys = [] 566 existing_clustered_primary_keys = [] 567 for col, col_indices in existing_cols_indices.items(): 568 for col_ix_doc in col_indices: 569 existing_ix_names.add(col_ix_doc.get('name', '').lower()) 570 if col_ix_doc.get('type', None) == 'PRIMARY KEY': 571 existing_primary_keys.append(col.lower()) 572 if col_ix_doc.get('clustered', True): 573 existing_clustered_primary_keys.append(col.lower()) 574 575 _datetime = pipe.get_columns('datetime', error=False) 576 _datetime_name = ( 577 sql_item_name(_datetime, self.flavor, None) 578 if _datetime is not None else None 579 ) 580 _datetime_index_name = ( 581 sql_item_name(index_names['datetime'], flavor=self.flavor, schema=None) 582 if index_names.get('datetime', None) 583 else None 584 ) 585 _id = pipe.get_columns('id', error=False) 586 _id_name = ( 587 sql_item_name(_id, self.flavor, None) 588 if _id is not None 589 else None 590 ) 591 primary_key = pipe.columns.get('primary', None) 592 primary_key_name = ( 593 sql_item_name(primary_key, flavor=self.flavor, schema=None) 594 if primary_key 595 else None 596 ) 597 autoincrement = ( 598 pipe.parameters.get('autoincrement', False) 599 or ( 600 primary_key is not None 601 and primary_key not in existing_cols_pd_types 602 ) 603 ) 604 primary_key_db_type = ( 605 get_db_type_from_pd_type(pipe.dtypes.get(primary_key, 'int') or 'int', self.flavor) 606 if primary_key 607 else None 608 ) 609 primary_key_constraint_name = ( 610 sql_item_name(f'PK_{pipe.target}', self.flavor, None) 611 if primary_key is not None 612 else None 613 ) 614 primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED" 615 datetime_clustered = ( 616 "CLUSTERED" 617 if not existing_clustered_primary_keys and _datetime is not None 618 else "NONCLUSTERED" 619 ) 620 include_columns_str = "\n ,".join( 621 [ 622 sql_item_name(col, flavor=self.flavor) for col in existing_cols_types 623 if col != _datetime 624 ] 625 ).rstrip(',') 626 include_clause = ( 627 ( 628 f"\nINCLUDE (\n {include_columns_str}\n)" 629 ) 630 if datetime_clustered == 'NONCLUSTERED' 631 else '' 632 ) 633 634 _id_index_name = ( 635 sql_item_name(index_names['id'], self.flavor, None) 636 if index_names.get('id', None) 637 else None 638 ) 639 _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 640 _create_space_partition = get_config('system', 'experimental', 'space') 641 642 ### create datetime index 643 dt_query = None 644 if _datetime is not None: 645 if ( 646 self.flavor in ('timescaledb', 'timescaledb-ha') 647 and pipe.parameters.get('hypertable', True) 648 ): 649 _id_count = ( 650 get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self) 651 if (_id is not None and _create_space_partition) else None 652 ) 653 654 chunk_interval = pipe.get_chunk_interval(debug=debug) 655 chunk_interval_minutes = ( 656 chunk_interval 657 if isinstance(chunk_interval, int) 658 else int(chunk_interval.total_seconds() / 60) 659 ) 660 chunk_time_interval = ( 661 f"INTERVAL '{chunk_interval_minutes} MINUTES'" 662 if isinstance(chunk_interval, timedelta) 663 else f'{chunk_interval_minutes}' 664 ) 665 666 dt_query = ( 667 f"SELECT public.create_hypertable('{_pipe_name}', " + 668 f"'{_datetime}', " 669 + ( 670 f"'{_id}', {_id_count}, " if (_id is not None and _create_space_partition) 671 else '' 672 ) 673 + f'chunk_time_interval => {chunk_time_interval}, ' 674 + 'if_not_exists => true, ' 675 + "migrate_data => true);" 676 ) 677 elif _datetime_index_name and _datetime != primary_key: 678 if self.flavor == 'mssql': 679 dt_query = ( 680 f"CREATE {datetime_clustered} INDEX {_datetime_index_name} " 681 f"\nON {_pipe_name} ({_datetime_name}){include_clause}" 682 ) 683 else: 684 dt_query = ( 685 f"CREATE INDEX {_datetime_index_name} " 686 + f"ON {_pipe_name} ({_datetime_name})" 687 ) 688 689 if dt_query: 690 index_queries[_datetime] = [dt_query] 691 692 primary_queries = [] 693 if ( 694 primary_key is not None 695 and primary_key.lower() not in existing_primary_keys 696 and not static 697 ): 698 if autoincrement and primary_key not in existing_cols_pd_types: 699 autoincrement_str = AUTO_INCREMENT_COLUMN_FLAVORS.get( 700 self.flavor, 701 AUTO_INCREMENT_COLUMN_FLAVORS['default'] 702 ) 703 primary_queries.extend([ 704 ( 705 f"ALTER TABLE {_pipe_name}\n" 706 f"ADD {primary_key_name} {primary_key_db_type} {autoincrement_str}" 707 ), 708 ]) 709 elif not autoincrement and primary_key in existing_cols_pd_types: 710 if self.flavor in ('sqlite', 'geopackage'): 711 new_table_name = sql_item_name( 712 f'_new_{pipe.target}', 713 self.flavor, 714 self.get_pipe_schema(pipe) 715 ) 716 select_cols_str = ', '.join( 717 [ 718 sql_item_name(col, self.flavor, None) 719 for col in existing_cols_types 720 ] 721 ) 722 primary_queries.extend( 723 get_create_table_queries( 724 existing_cols_pd_types, 725 f'_new_{pipe.target}', 726 self.flavor, 727 schema=self.get_pipe_schema(pipe), 728 primary_key=primary_key, 729 ) + [ 730 ( 731 f"INSERT INTO {new_table_name} ({select_cols_str})\n" 732 f"SELECT {select_cols_str}\nFROM {_pipe_name}" 733 ), 734 f"DROP TABLE {_pipe_name}", 735 ] + get_rename_table_queries( 736 f'_new_{pipe.target}', 737 pipe.target, 738 self.flavor, 739 schema=self.get_pipe_schema(pipe), 740 ) 741 ) 742 elif self.flavor == 'oracle': 743 primary_queries.extend([ 744 ( 745 f"ALTER TABLE {_pipe_name}\n" 746 f"MODIFY {primary_key_name} NOT NULL" 747 ), 748 ( 749 f"ALTER TABLE {_pipe_name}\n" 750 f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})" 751 ) 752 ]) 753 elif self.flavor in ('mysql', 'mariadb'): 754 primary_queries.extend([ 755 ( 756 f"ALTER TABLE {_pipe_name}\n" 757 f"MODIFY {primary_key_name} {primary_key_db_type} NOT NULL" 758 ), 759 ( 760 f"ALTER TABLE {_pipe_name}\n" 761 f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})" 762 ) 763 ]) 764 elif self.flavor in ('timescaledb', 'timescaledb-ha'): 765 primary_queries.extend([ 766 ( 767 f"ALTER TABLE {_pipe_name}\n" 768 f"ALTER COLUMN {primary_key_name} SET NOT NULL" 769 ), 770 ( 771 f"ALTER TABLE {_pipe_name}\n" 772 f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY (" + ( 773 f"{_datetime_name}, " if _datetime_name else "" 774 ) + f"{primary_key_name})" 775 ), 776 ]) 777 elif self.flavor in ('citus', 'postgresql', 'duckdb', 'postgis'): 778 primary_queries.extend([ 779 ( 780 f"ALTER TABLE {_pipe_name}\n" 781 f"ALTER COLUMN {primary_key_name} SET NOT NULL" 782 ), 783 ( 784 f"ALTER TABLE {_pipe_name}\n" 785 f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})" 786 ), 787 ]) 788 else: 789 primary_queries.extend([ 790 ( 791 f"ALTER TABLE {_pipe_name}\n" 792 f"ALTER COLUMN {primary_key_name} {primary_key_db_type} NOT NULL" 793 ), 794 ( 795 f"ALTER TABLE {_pipe_name}\n" 796 f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})" 797 ), 798 ]) 799 index_queries[primary_key] = primary_queries 800 801 ### create id index 802 if _id_name is not None: 803 if self.flavor in ('timescaledb', 'timescaledb-ha'): 804 ### Already created indices via create_hypertable. 805 id_query = ( 806 None if (_id is not None and _create_space_partition) 807 else ( 808 f"CREATE INDEX IF NOT EXISTS {_id_index_name} ON {_pipe_name} ({_id_name})" 809 if _id is not None 810 else None 811 ) 812 ) 813 pass 814 else: ### mssql, sqlite, etc. 815 id_query = f"CREATE INDEX {_id_index_name} ON {_pipe_name} ({_id_name})" 816 817 if id_query is not None: 818 index_queries[_id] = id_query if isinstance(id_query, list) else [id_query] 819 820 ### Create indices for other labels in `pipe.columns`. 821 other_index_names = { 822 ix_key: ix_unquoted 823 for ix_key, ix_unquoted in index_names.items() 824 if ( 825 ix_key not in ('datetime', 'id', 'primary') 826 and ix_unquoted.lower() not in existing_ix_names 827 ) 828 } 829 for ix_key, ix_unquoted in other_index_names.items(): 830 ix_name = sql_item_name(ix_unquoted, self.flavor, None) 831 cols = indices[ix_key] 832 if not isinstance(cols, (list, tuple)): 833 cols = [cols] 834 if ix_key == 'unique' and upsert: 835 continue 836 cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col] 837 if not cols_names: 838 continue 839 840 cols_names_str = ", ".join(cols_names) 841 index_query_params_clause = f" ({cols_names_str})" 842 if self.flavor in ('postgis', 'timescaledb-ha'): 843 for col in cols: 844 col_typ = existing_cols_pd_types.get(cols[0], 'object') 845 if col_typ != 'object' and are_dtypes_equal(col_typ, 'geometry'): 846 index_query_params_clause = f" USING GIST ({cols_names_str})" 847 break 848 849 index_queries[ix_key] = [ 850 f"CREATE INDEX {ix_name} ON {_pipe_name}{index_query_params_clause}" 851 ] 852 853 indices_cols_str = ', '.join( 854 list({ 855 sql_item_name(ix, self.flavor) 856 for ix_key, ix in pipe.columns.items() 857 if ix and ix in existing_cols_types 858 }) 859 ) 860 coalesce_indices_cols_str = ', '.join( 861 [ 862 ( 863 ( 864 "COALESCE(" 865 + sql_item_name(ix, self.flavor) 866 + ", " 867 + get_null_replacement(existing_cols_types[ix], self.flavor) 868 + ") " 869 ) 870 if ix_key != 'datetime' and null_indices 871 else sql_item_name(ix, self.flavor) 872 ) 873 for ix_key, ix in pipe.columns.items() 874 if ix and ix in existing_cols_types 875 ] 876 ) 877 unique_index_name = sql_item_name(unique_index_name_unquoted, self.flavor) 878 constraint_name_unquoted = unique_index_name_unquoted.replace('IX_', 'UQ_') 879 constraint_name = sql_item_name(constraint_name_unquoted, self.flavor) 880 add_constraint_query = ( 881 f"ALTER TABLE {_pipe_name} ADD CONSTRAINT {constraint_name} UNIQUE ({indices_cols_str})" 882 ) 883 unique_index_cols_str = ( 884 indices_cols_str 885 if self.flavor not in COALESCE_UNIQUE_INDEX_FLAVORS or not null_indices 886 else coalesce_indices_cols_str 887 ) 888 create_unique_index_query = ( 889 f"CREATE UNIQUE INDEX {unique_index_name} ON {_pipe_name} ({unique_index_cols_str})" 890 ) 891 constraint_queries = [create_unique_index_query] 892 if self.flavor not in ('sqlite', 'geopackage'): 893 constraint_queries.append(add_constraint_query) 894 if upsert and indices_cols_str: 895 index_queries[unique_index_name] = constraint_queries 896 return index_queries
Return a dictionary mapping columns to a CREATE INDEX or equivalent query.
Parameters
- pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
- A dictionary of index names mapping to lists of queries.
899def get_drop_index_queries( 900 self, 901 pipe: mrsm.Pipe, 902 debug: bool = False, 903) -> Dict[str, List[str]]: 904 """ 905 Return a dictionary mapping columns to a `DROP INDEX` or equivalent query. 906 907 Parameters 908 ---------- 909 pipe: mrsm.Pipe 910 The pipe to which the queries will correspond. 911 912 Returns 913 ------- 914 A dictionary of column names mapping to lists of queries. 915 """ 916 ### NOTE: Due to breaking changes within DuckDB, indices must be skipped. 917 if self.flavor == 'duckdb': 918 return {} 919 if not pipe.exists(debug=debug): 920 return {} 921 922 from collections import defaultdict 923 from meerschaum.utils.sql import ( 924 sql_item_name, 925 table_exists, 926 hypertable_queries, 927 DROP_INDEX_IF_EXISTS_FLAVORS, 928 ) 929 drop_queries = defaultdict(lambda: []) 930 schema = self.get_pipe_schema(pipe) 931 index_schema = schema if self.flavor != 'mssql' else None 932 indices = { 933 ix_key: ix 934 for ix_key, ix in pipe.get_indices().items() 935 } 936 cols_indices = pipe.get_columns_indices(debug=debug) 937 existing_indices = set() 938 clustered_ix = None 939 for col, ix_metas in cols_indices.items(): 940 for ix_meta in ix_metas: 941 ix_name = ix_meta.get('name', None) 942 if ix_meta.get('clustered', False): 943 clustered_ix = ix_name 944 existing_indices.add(ix_name.lower()) 945 pipe_name = sql_item_name(pipe.target, self.flavor, schema) 946 pipe_name_no_schema = sql_item_name(pipe.target, self.flavor, None) 947 upsert = pipe.upsert 948 949 if self.flavor not in hypertable_queries: 950 is_hypertable = False 951 else: 952 is_hypertable_query = hypertable_queries[self.flavor].format(table_name=pipe_name) 953 is_hypertable = self.value(is_hypertable_query, silent=True, debug=debug) is not None 954 955 if_exists_str = "IF EXISTS " if self.flavor in DROP_INDEX_IF_EXISTS_FLAVORS else "" 956 if is_hypertable: 957 nuke_queries = [] 958 temp_table = '_' + pipe.target + '_temp_migration' 959 temp_table_name = sql_item_name(temp_table, self.flavor, self.get_pipe_schema(pipe)) 960 961 if table_exists(temp_table, self, schema=self.get_pipe_schema(pipe), debug=debug): 962 nuke_queries.append(f"DROP TABLE {if_exists_str} {temp_table_name}") 963 nuke_queries += [ 964 f"SELECT * INTO {temp_table_name} FROM {pipe_name}", 965 f"DROP TABLE {if_exists_str}{pipe_name}", 966 f"ALTER TABLE {temp_table_name} RENAME TO {pipe_name_no_schema}", 967 ] 968 nuke_ix_keys = ('datetime', 'id') 969 nuked = False 970 for ix_key in nuke_ix_keys: 971 if ix_key in indices and not nuked: 972 drop_queries[ix_key].extend(nuke_queries) 973 nuked = True 974 975 for ix_key, ix_unquoted in indices.items(): 976 if ix_key in drop_queries: 977 continue 978 if ix_unquoted.lower() not in existing_indices: 979 continue 980 981 if ( 982 ix_key == 'unique' 983 and upsert 984 and self.flavor not in ('sqlite', 'geopackage') 985 and not is_hypertable 986 ): 987 constraint_name_unquoted = ix_unquoted.replace('IX_', 'UQ_') 988 constraint_name = sql_item_name(constraint_name_unquoted, self.flavor) 989 constraint_or_index = ( 990 "CONSTRAINT" 991 if self.flavor not in ('mysql', 'mariadb') 992 else 'INDEX' 993 ) 994 drop_queries[ix_key].append( 995 f"ALTER TABLE {pipe_name}\n" 996 f"DROP {constraint_or_index} {constraint_name}" 997 ) 998 999 query = ( 1000 ( 1001 f"ALTER TABLE {pipe_name}\n" 1002 if self.flavor in ('mysql', 'mariadb') 1003 else '' 1004 ) 1005 + f"DROP INDEX {if_exists_str}" 1006 + sql_item_name(ix_unquoted, self.flavor, index_schema) 1007 ) 1008 if self.flavor == 'mssql': 1009 query += f"\nON {pipe_name}" 1010 if ix_unquoted == clustered_ix: 1011 query += "\nWITH (ONLINE = ON, MAXDOP = 4)" 1012 drop_queries[ix_key].append(query) 1013 1014 1015 return drop_queries
Return a dictionary mapping columns to a DROP INDEX or equivalent query.
Parameters
- pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
- A dictionary of column names mapping to lists of queries.
3163def get_add_columns_queries( 3164 self, 3165 pipe: mrsm.Pipe, 3166 df: Union[pd.DataFrame, Dict[str, str]], 3167 _is_db_types: bool = False, 3168 debug: bool = False, 3169) -> List[str]: 3170 """ 3171 Add new null columns of the correct type to a table from a dataframe. 3172 3173 Parameters 3174 ---------- 3175 pipe: mrsm.Pipe 3176 The pipe to be altered. 3177 3178 df: Union[pd.DataFrame, Dict[str, str]] 3179 The pandas DataFrame which contains new columns. 3180 If a dictionary is provided, assume it maps columns to Pandas data types. 3181 3182 _is_db_types: bool, default False 3183 If `True`, assume `df` is a dictionary mapping columns to SQL native dtypes. 3184 3185 Returns 3186 ------- 3187 A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector. 3188 """ 3189 if not pipe.exists(debug=debug): 3190 return [] 3191 3192 if pipe.parameters.get('static', False): 3193 return [] 3194 3195 from decimal import Decimal 3196 import copy 3197 from meerschaum.utils.sql import ( 3198 sql_item_name, 3199 SINGLE_ALTER_TABLE_FLAVORS, 3200 get_table_cols_types, 3201 ) 3202 from meerschaum.utils.dtypes.sql import ( 3203 get_pd_type_from_db_type, 3204 get_db_type_from_pd_type, 3205 ) 3206 from meerschaum.utils.misc import flatten_list 3207 is_dask = 'dask' in df.__module__ if not isinstance(df, dict) else False 3208 if is_dask: 3209 df = df.partitions[0].compute() 3210 df_cols_types = ( 3211 { 3212 col: str(typ) 3213 for col, typ in df.dtypes.items() 3214 } 3215 if not isinstance(df, dict) 3216 else copy.deepcopy(df) 3217 ) 3218 if not isinstance(df, dict) and len(df.index) > 0: 3219 for col, typ in list(df_cols_types.items()): 3220 if typ != 'object': 3221 continue 3222 val = df.iloc[0][col] 3223 if isinstance(val, (dict, list)): 3224 df_cols_types[col] = 'json' 3225 elif isinstance(val, Decimal): 3226 df_cols_types[col] = 'numeric' 3227 elif isinstance(val, str): 3228 df_cols_types[col] = 'str' 3229 db_cols_types = { 3230 col: get_pd_type_from_db_type(typ) 3231 for col, typ in get_table_cols_types( 3232 pipe.target, 3233 self, 3234 schema=self.get_pipe_schema(pipe), 3235 debug=debug, 3236 ).items() 3237 } 3238 new_cols = set(df_cols_types) - set(db_cols_types) 3239 if not new_cols: 3240 return [] 3241 3242 new_cols_types = { 3243 col: get_db_type_from_pd_type( 3244 df_cols_types[col], 3245 self.flavor 3246 ) 3247 for col in new_cols 3248 if col and df_cols_types.get(col, None) 3249 } 3250 3251 alter_table_query = "ALTER TABLE " + sql_item_name( 3252 pipe.target, self.flavor, self.get_pipe_schema(pipe) 3253 ) 3254 queries = [] 3255 for col, typ in new_cols_types.items(): 3256 add_col_query = ( 3257 "\nADD " 3258 + sql_item_name(col, self.flavor, None) 3259 + " " + typ + "," 3260 ) 3261 3262 if self.flavor in SINGLE_ALTER_TABLE_FLAVORS: 3263 queries.append(alter_table_query + add_col_query[:-1]) 3264 else: 3265 alter_table_query += add_col_query 3266 3267 ### For most flavors, only one query is required. 3268 ### This covers SQLite which requires one query per column. 3269 if not queries: 3270 queries.append(alter_table_query[:-1]) 3271 3272 if self.flavor != 'duckdb': 3273 return queries 3274 3275 ### NOTE: For DuckDB, we must drop and rebuild the indices. 3276 drop_index_queries = list(flatten_list( 3277 [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()] 3278 )) 3279 create_index_queries = list(flatten_list( 3280 [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()] 3281 )) 3282 3283 return drop_index_queries + queries + create_index_queries
Add new null columns of the correct type to a table from a dataframe.
Parameters
- pipe (mrsm.Pipe): The pipe to be altered.
- df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which contains new columns. If a dictionary is provided, assume it maps columns to Pandas data types.
- _is_db_types (bool, default False):
If
True, assumedfis a dictionary mapping columns to SQL native dtypes.
Returns
- A list of the
ALTER TABLESQL query or queries to be executed on the provided connector.
3286def get_alter_columns_queries( 3287 self, 3288 pipe: mrsm.Pipe, 3289 df: Union[pd.DataFrame, Dict[str, str]], 3290 debug: bool = False, 3291) -> List[str]: 3292 """ 3293 If we encounter a column of a different type, set the entire column to text. 3294 If the altered columns are numeric, alter to numeric instead. 3295 3296 Parameters 3297 ---------- 3298 pipe: mrsm.Pipe 3299 The pipe to be altered. 3300 3301 df: Union[pd.DataFrame, Dict[str, str]] 3302 The pandas DataFrame which may contain altered columns. 3303 If a dict is provided, assume it maps columns to Pandas data types. 3304 3305 Returns 3306 ------- 3307 A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector. 3308 """ 3309 if not pipe.exists(debug=debug) or pipe.static: 3310 return [] 3311 3312 from meerschaum.utils.sql import ( 3313 sql_item_name, 3314 get_table_cols_types, 3315 DROP_IF_EXISTS_FLAVORS, 3316 SINGLE_ALTER_TABLE_FLAVORS, 3317 ) 3318 from meerschaum.utils.dataframe import get_numeric_cols 3319 from meerschaum.utils.dtypes import are_dtypes_equal 3320 from meerschaum.utils.dtypes.sql import ( 3321 get_pd_type_from_db_type, 3322 get_db_type_from_pd_type, 3323 ) 3324 from meerschaum.utils.misc import flatten_list, generate_password, items_str 3325 target = pipe.target 3326 session_id = generate_password(3) 3327 numeric_cols = ( 3328 get_numeric_cols(df) 3329 if not isinstance(df, dict) 3330 else [ 3331 col 3332 for col, typ in df.items() 3333 if typ.startswith('numeric') 3334 ] 3335 ) 3336 df_cols_types = ( 3337 { 3338 col: str(typ) 3339 for col, typ in df.dtypes.items() 3340 } 3341 if not isinstance(df, dict) 3342 else df 3343 ) 3344 db_cols_types = { 3345 col: get_pd_type_from_db_type(typ) 3346 for col, typ in get_table_cols_types( 3347 pipe.target, 3348 self, 3349 schema=self.get_pipe_schema(pipe), 3350 debug=debug, 3351 ).items() 3352 } 3353 pipe_dtypes = pipe.get_dtypes(debug=debug) 3354 pipe_bool_cols = [col for col, typ in pipe_dtypes.items() if are_dtypes_equal(str(typ), 'bool')] 3355 pd_db_df_aliases = { 3356 'int': 'bool', 3357 'float': 'bool', 3358 'numeric': 'bool', 3359 'guid': 'object', 3360 } 3361 if self.flavor == 'oracle': 3362 pd_db_df_aliases.update({ 3363 'int': 'numeric', 3364 'date': 'datetime', 3365 'numeric': 'int', 3366 }) 3367 elif self.flavor == 'geopackage': 3368 pd_db_df_aliases.update({ 3369 'geometry': 'bytes', 3370 'bytes': 'geometry', 3371 }) 3372 3373 altered_cols = { 3374 col: (db_cols_types.get(col, 'object'), typ) 3375 for col, typ in df_cols_types.items() 3376 if not are_dtypes_equal(typ, db_cols_types.get(col, 'object').lower()) 3377 and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string') 3378 } 3379 3380 if debug and altered_cols: 3381 dprint("Columns to be altered:") 3382 mrsm.pprint(altered_cols) 3383 3384 ### NOTE: Special columns (numerics, bools, etc.) are captured and cached upon detection. 3385 new_special_cols = pipe._get_cached_value('new_special_cols', debug=debug) or {} 3386 new_special_db_cols_types = { 3387 col: (db_cols_types.get(col, 'object'), typ) 3388 for col, typ in new_special_cols.items() 3389 } 3390 if debug: 3391 dprint("Cached new special columns:") 3392 mrsm.pprint(new_special_cols) 3393 dprint("New special columns db types:") 3394 mrsm.pprint(new_special_db_cols_types) 3395 3396 altered_cols.update(new_special_db_cols_types) 3397 3398 ### NOTE: Sometimes bools are coerced into ints or floats. 3399 altered_cols_to_ignore = set() 3400 for col, (db_typ, df_typ) in altered_cols.items(): 3401 for db_alias, df_alias in pd_db_df_aliases.items(): 3402 if ( 3403 db_alias in db_typ.lower() 3404 and df_alias in df_typ.lower() 3405 and col not in new_special_cols 3406 ): 3407 altered_cols_to_ignore.add(col) 3408 3409 ### Oracle's bool handling sometimes mixes NUMBER and INT. 3410 for bool_col in pipe_bool_cols: 3411 if bool_col not in altered_cols: 3412 continue 3413 db_is_bool_compatible = ( 3414 are_dtypes_equal('int', altered_cols[bool_col][0]) 3415 or are_dtypes_equal('float', altered_cols[bool_col][0]) 3416 or are_dtypes_equal('numeric', altered_cols[bool_col][0]) 3417 or are_dtypes_equal('bool', altered_cols[bool_col][0]) 3418 ) 3419 df_is_bool_compatible = ( 3420 are_dtypes_equal('int', altered_cols[bool_col][1]) 3421 or are_dtypes_equal('float', altered_cols[bool_col][1]) 3422 or are_dtypes_equal('numeric', altered_cols[bool_col][1]) 3423 or are_dtypes_equal('bool', altered_cols[bool_col][1]) 3424 ) 3425 if db_is_bool_compatible and df_is_bool_compatible: 3426 altered_cols_to_ignore.add(bool_col) 3427 3428 if debug and altered_cols_to_ignore: 3429 dprint("Ignoring the following altered columns (false positives).") 3430 mrsm.pprint(altered_cols_to_ignore) 3431 3432 for col in altered_cols_to_ignore: 3433 _ = altered_cols.pop(col, None) 3434 3435 if not altered_cols: 3436 return [] 3437 3438 if numeric_cols: 3439 explicit_pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug) 3440 explicit_pipe_dtypes.update({col: 'numeric' for col in numeric_cols}) 3441 pipe.dtypes = explicit_pipe_dtypes 3442 if not pipe.temporary: 3443 edit_success, edit_msg = pipe.edit(debug=debug) 3444 if not edit_success: 3445 warn( 3446 f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n" 3447 + f"{edit_msg}" 3448 ) 3449 else: 3450 numeric_cols.extend([col for col, typ in pipe_dtypes.items() if typ.startswith('numeric')]) 3451 3452 numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False) 3453 text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False) 3454 altered_cols_types = { 3455 col: ( 3456 numeric_type 3457 if col in numeric_cols 3458 else text_type 3459 ) 3460 for col, (db_typ, typ) in altered_cols.items() 3461 } 3462 3463 if self.flavor in ('sqlite', 'geopackage'): 3464 temp_table_name = '-' + session_id + '_' + target 3465 rename_query = ( 3466 "ALTER TABLE " 3467 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3468 + " RENAME TO " 3469 + sql_item_name(temp_table_name, self.flavor, None) 3470 ) 3471 create_query = ( 3472 "CREATE TABLE " 3473 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3474 + " (\n" 3475 ) 3476 for col_name, col_typ in db_cols_types.items(): 3477 create_query += ( 3478 sql_item_name(col_name, self.flavor, None) 3479 + " " 3480 + ( 3481 col_typ 3482 if col_name not in altered_cols 3483 else altered_cols_types[col_name] 3484 ) 3485 + ",\n" 3486 ) 3487 create_query = create_query[:-2] + "\n)" 3488 3489 insert_query = ( 3490 "INSERT INTO " 3491 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3492 + ' (' 3493 + ', '.join([ 3494 sql_item_name(col_name, self.flavor, None) 3495 for col_name in db_cols_types 3496 ]) 3497 + ')' 3498 + "\nSELECT\n" 3499 ) 3500 for col_name in db_cols_types: 3501 new_col_str = ( 3502 sql_item_name(col_name, self.flavor, None) 3503 if col_name not in altered_cols 3504 else ( 3505 "CAST(" 3506 + sql_item_name(col_name, self.flavor, None) 3507 + " AS " 3508 + altered_cols_types[col_name] 3509 + ")" 3510 ) 3511 ) 3512 insert_query += new_col_str + ",\n" 3513 3514 insert_query = insert_query[:-2] + ( 3515 f"\nFROM {sql_item_name(temp_table_name, self.flavor, self.get_pipe_schema(pipe))}" 3516 ) 3517 3518 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 3519 3520 drop_query = f"DROP TABLE {if_exists_str}" + sql_item_name( 3521 temp_table_name, self.flavor, self.get_pipe_schema(pipe) 3522 ) 3523 return [ 3524 rename_query, 3525 create_query, 3526 insert_query, 3527 drop_query, 3528 ] 3529 3530 queries = [] 3531 if self.flavor == 'oracle': 3532 for col, typ in altered_cols_types.items(): 3533 add_query = ( 3534 "ALTER TABLE " 3535 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3536 + "\nADD " + sql_item_name(col + '_temp', self.flavor, None) 3537 + " " + typ 3538 ) 3539 queries.append(add_query) 3540 3541 for col, typ in altered_cols_types.items(): 3542 populate_temp_query = ( 3543 "UPDATE " 3544 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3545 + "\nSET " + sql_item_name(col + '_temp', self.flavor, None) 3546 + ' = ' + sql_item_name(col, self.flavor, None) 3547 ) 3548 queries.append(populate_temp_query) 3549 3550 for col, typ in altered_cols_types.items(): 3551 set_old_cols_to_null_query = ( 3552 "UPDATE " 3553 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3554 + "\nSET " + sql_item_name(col, self.flavor, None) 3555 + ' = NULL' 3556 ) 3557 queries.append(set_old_cols_to_null_query) 3558 3559 for col, typ in altered_cols_types.items(): 3560 alter_type_query = ( 3561 "ALTER TABLE " 3562 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3563 + "\nMODIFY " + sql_item_name(col, self.flavor, None) + ' ' 3564 + typ 3565 ) 3566 queries.append(alter_type_query) 3567 3568 for col, typ in altered_cols_types.items(): 3569 set_old_to_temp_query = ( 3570 "UPDATE " 3571 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3572 + "\nSET " + sql_item_name(col, self.flavor, None) 3573 + ' = ' + sql_item_name(col + '_temp', self.flavor, None) 3574 ) 3575 queries.append(set_old_to_temp_query) 3576 3577 for col, typ in altered_cols_types.items(): 3578 drop_temp_query = ( 3579 "ALTER TABLE " 3580 + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3581 + "\nDROP COLUMN " + sql_item_name(col + '_temp', self.flavor, None) 3582 ) 3583 queries.append(drop_temp_query) 3584 3585 return queries 3586 3587 query = "ALTER TABLE " + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)) 3588 for col, typ in altered_cols_types.items(): 3589 alter_col_prefix = ( 3590 'ALTER' if self.flavor not in ('mysql', 'mariadb', 'oracle') 3591 else 'MODIFY' 3592 ) 3593 type_prefix = ( 3594 '' if self.flavor in ('mssql', 'mariadb', 'mysql') 3595 else 'TYPE ' 3596 ) 3597 column_str = 'COLUMN' if self.flavor != 'oracle' else '' 3598 query_suffix = ( 3599 f"\n{alter_col_prefix} {column_str} " 3600 + sql_item_name(col, self.flavor, None) 3601 + " " + type_prefix + typ + "," 3602 ) 3603 if self.flavor not in SINGLE_ALTER_TABLE_FLAVORS: 3604 query += query_suffix 3605 else: 3606 queries.append(query + query_suffix[:-1]) 3607 3608 if self.flavor not in SINGLE_ALTER_TABLE_FLAVORS: 3609 queries.append(query[:-1]) 3610 3611 if self.flavor != 'duckdb': 3612 return queries 3613 3614 drop_index_queries = list(flatten_list( 3615 [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()] 3616 )) 3617 create_index_queries = list(flatten_list( 3618 [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()] 3619 )) 3620 3621 return drop_index_queries + queries + create_index_queries
If we encounter a column of a different type, set the entire column to text. If the altered columns are numeric, alter to numeric instead.
Parameters
- pipe (mrsm.Pipe): The pipe to be altered.
- df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which may contain altered columns. If a dict is provided, assume it maps columns to Pandas data types.
Returns
- A list of the
ALTER TABLESQL query or queries to be executed on the provided connector.
1018def delete_pipe( 1019 self, 1020 pipe: mrsm.Pipe, 1021 debug: bool = False, 1022) -> SuccessTuple: 1023 """ 1024 Delete a Pipe's registration. 1025 """ 1026 from meerschaum.utils.packages import attempt_import 1027 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 1028 1029 if not pipe.id: 1030 return False, f"{pipe} is not registered." 1031 1032 ### ensure pipes table exists 1033 from meerschaum.connectors.sql.tables import get_tables 1034 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 1035 1036 q = sqlalchemy.delete(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id) 1037 if not self.exec(q, debug=debug): 1038 return False, f"Failed to delete registration for {pipe}." 1039 1040 return True, "Success"
Delete a Pipe's registration.
1043def get_pipe_data( 1044 self, 1045 pipe: mrsm.Pipe, 1046 select_columns: Optional[List[str]] = None, 1047 omit_columns: Optional[List[str]] = None, 1048 begin: Union[datetime, str, None] = None, 1049 end: Union[datetime, str, None] = None, 1050 params: Optional[Dict[str, Any]] = None, 1051 order: str = 'asc', 1052 limit: Optional[int] = None, 1053 begin_add_minutes: int = 0, 1054 end_add_minutes: int = 0, 1055 chunksize: Optional[int] = -1, 1056 as_iterator: bool = False, 1057 debug: bool = False, 1058 **kw: Any 1059) -> Union[pd.DataFrame, None]: 1060 """ 1061 Access a pipe's data from the SQL instance. 1062 1063 Parameters 1064 ---------- 1065 pipe: mrsm.Pipe: 1066 The pipe to get data from. 1067 1068 select_columns: Optional[List[str]], default None 1069 If provided, only select these given columns. 1070 Otherwise select all available columns (i.e. `SELECT *`). 1071 1072 omit_columns: Optional[List[str]], default None 1073 If provided, remove these columns from the selection. 1074 1075 begin: Union[datetime, str, None], default None 1076 If provided, get rows newer than or equal to this value. 1077 1078 end: Union[datetime, str, None], default None 1079 If provided, get rows older than or equal to this value. 1080 1081 params: Optional[Dict[str, Any]], default None 1082 Additional parameters to filter by. 1083 See `meerschaum.connectors.sql.build_where`. 1084 1085 order: Optional[str], default 'asc' 1086 The selection order for all of the indices in the query. 1087 If `None`, omit the `ORDER BY` clause. 1088 1089 limit: Optional[int], default None 1090 If specified, limit the number of rows retrieved to this value. 1091 1092 begin_add_minutes: int, default 0 1093 The number of minutes to add to the `begin` datetime (i.e. `DATEADD`). 1094 1095 end_add_minutes: int, default 0 1096 The number of minutes to add to the `end` datetime (i.e. `DATEADD`). 1097 1098 chunksize: Optional[int], default -1 1099 The size of dataframe chunks to load into memory. 1100 1101 as_iterator: bool, default False 1102 If `True`, return the chunks iterator directly. 1103 1104 debug: bool, default False 1105 Verbosity toggle. 1106 1107 Returns 1108 ------- 1109 A `pd.DataFrame` of the pipe's data. 1110 1111 """ 1112 import functools 1113 from meerschaum.utils.packages import import_pandas 1114 from meerschaum.utils.dtypes import to_pandas_dtype, are_dtypes_equal 1115 from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type 1116 pd = import_pandas() 1117 is_dask = 'dask' in pd.__name__ 1118 1119 cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {} 1120 pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug) if pipe.enforce else {} 1121 1122 remote_pandas_types = { 1123 col: to_pandas_dtype(get_pd_type_from_db_type(typ)) 1124 for col, typ in cols_types.items() 1125 } 1126 remote_dt_cols_types = { 1127 col: typ 1128 for col, typ in remote_pandas_types.items() 1129 if are_dtypes_equal(typ, 'datetime') 1130 } 1131 remote_dt_tz_aware_cols_types = { 1132 col: typ 1133 for col, typ in remote_dt_cols_types.items() 1134 if ',' in typ or typ == 'datetime' 1135 } 1136 remote_dt_tz_naive_cols_types = { 1137 col: typ 1138 for col, typ in remote_dt_cols_types.items() 1139 if col not in remote_dt_tz_aware_cols_types 1140 } 1141 1142 configured_pandas_types = { 1143 col: to_pandas_dtype(typ) 1144 for col, typ in pipe_dtypes.items() 1145 } 1146 configured_lower_precision_dt_cols_types = { 1147 col: typ 1148 for col, typ in pipe_dtypes.items() 1149 if ( 1150 are_dtypes_equal('datetime', typ) 1151 and '[' in typ 1152 and 'ns' not in typ 1153 ) 1154 1155 } 1156 1157 dtypes = { 1158 **remote_pandas_types, 1159 **configured_pandas_types, 1160 **remote_dt_tz_aware_cols_types, 1161 **remote_dt_tz_naive_cols_types, 1162 **configured_lower_precision_dt_cols_types 1163 } if pipe.enforce else {} 1164 1165 existing_cols = cols_types.keys() 1166 select_columns = ( 1167 [ 1168 col 1169 for col in existing_cols 1170 if col not in (omit_columns or []) 1171 ] 1172 if not select_columns 1173 else [ 1174 col 1175 for col in select_columns 1176 if col in existing_cols 1177 and col not in (omit_columns or []) 1178 ] 1179 ) if pipe.enforce else select_columns 1180 1181 if select_columns: 1182 dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns} 1183 1184 dtypes = { 1185 col: typ 1186 for col, typ in dtypes.items() 1187 if col in (select_columns or [col]) and col not in (omit_columns or []) 1188 } if pipe.enforce else {} 1189 1190 if debug: 1191 dprint(f"[{self}] `read()` dtypes:") 1192 mrsm.pprint(dtypes) 1193 1194 query = self.get_pipe_data_query( 1195 pipe, 1196 select_columns=select_columns, 1197 omit_columns=omit_columns, 1198 begin=begin, 1199 end=end, 1200 params=params, 1201 order=order, 1202 limit=limit, 1203 begin_add_minutes=begin_add_minutes, 1204 end_add_minutes=end_add_minutes, 1205 debug=debug, 1206 **kw 1207 ) 1208 1209 read_kwargs = {} 1210 if is_dask: 1211 index_col = pipe.columns.get('datetime', None) 1212 read_kwargs['index_col'] = index_col 1213 1214 chunks = self.read( 1215 query, 1216 chunksize=chunksize, 1217 as_iterator=True, 1218 coerce_float=False, 1219 dtype=dtypes, 1220 debug=debug, 1221 **read_kwargs 1222 ) 1223 1224 if as_iterator: 1225 return chunks 1226 1227 return pd.concat(chunks)
Access a pipe's data from the SQL instance.
Parameters
- pipe (mrsm.Pipe:): The pipe to get data from.
- select_columns (Optional[List[str]], default None):
If provided, only select these given columns.
Otherwise select all available columns (i.e.
SELECT *). - omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
- begin (Union[datetime, str, None], default None): If provided, get rows newer than or equal to this value.
- end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
- params (Optional[Dict[str, Any]], default None):
Additional parameters to filter by.
See
meerschaum.connectors.sql.build_where. - order (Optional[str], default 'asc'):
The selection order for all of the indices in the query.
If
None, omit theORDER BYclause. - limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
- begin_add_minutes (int, default 0):
The number of minutes to add to the
begindatetime (i.e.DATEADD). - end_add_minutes (int, default 0):
The number of minutes to add to the
enddatetime (i.e.DATEADD). - chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
- as_iterator (bool, default False):
If
True, return the chunks iterator directly. - debug (bool, default False): Verbosity toggle.
Returns
- A
pd.DataFrameof the pipe's data.
1230def get_pipe_data_query( 1231 self, 1232 pipe: mrsm.Pipe, 1233 select_columns: Optional[List[str]] = None, 1234 omit_columns: Optional[List[str]] = None, 1235 begin: Union[datetime, int, str, None] = None, 1236 end: Union[datetime, int, str, None] = None, 1237 params: Optional[Dict[str, Any]] = None, 1238 order: Optional[str] = 'asc', 1239 sort_datetimes: bool = False, 1240 limit: Optional[int] = None, 1241 begin_add_minutes: int = 0, 1242 end_add_minutes: int = 0, 1243 replace_nulls: Optional[str] = None, 1244 skip_existing_cols_check: bool = False, 1245 debug: bool = False, 1246 **kw: Any 1247) -> Union[str, None]: 1248 """ 1249 Return the `SELECT` query for retrieving a pipe's data from its instance. 1250 1251 Parameters 1252 ---------- 1253 pipe: mrsm.Pipe: 1254 The pipe to get data from. 1255 1256 select_columns: Optional[List[str]], default None 1257 If provided, only select these given columns. 1258 Otherwise select all available columns (i.e. `SELECT *`). 1259 1260 omit_columns: Optional[List[str]], default None 1261 If provided, remove these columns from the selection. 1262 1263 begin: Union[datetime, int, str, None], default None 1264 If provided, get rows newer than or equal to this value. 1265 1266 end: Union[datetime, str, None], default None 1267 If provided, get rows older than or equal to this value. 1268 1269 params: Optional[Dict[str, Any]], default None 1270 Additional parameters to filter by. 1271 See `meerschaum.connectors.sql.build_where`. 1272 1273 order: Optional[str], default None 1274 The selection order for all of the indices in the query. 1275 If `None`, omit the `ORDER BY` clause. 1276 1277 sort_datetimes: bool, default False 1278 Alias for `order='desc'`. 1279 1280 limit: Optional[int], default None 1281 If specified, limit the number of rows retrieved to this value. 1282 1283 begin_add_minutes: int, default 0 1284 The number of minutes to add to the `begin` datetime (i.e. `DATEADD`). 1285 1286 end_add_minutes: int, default 0 1287 The number of minutes to add to the `end` datetime (i.e. `DATEADD`). 1288 1289 chunksize: Optional[int], default -1 1290 The size of dataframe chunks to load into memory. 1291 1292 replace_nulls: Optional[str], default None 1293 If provided, replace null values with this value. 1294 1295 skip_existing_cols_check: bool, default False 1296 If `True`, do not verify that querying columns are actually on the table. 1297 1298 debug: bool, default False 1299 Verbosity toggle. 1300 1301 Returns 1302 ------- 1303 A `SELECT` query to retrieve a pipe's data. 1304 """ 1305 from meerschaum.utils.misc import items_str 1306 from meerschaum.utils.sql import sql_item_name, dateadd_str 1307 from meerschaum.utils.dtypes import coerce_timezone 1308 from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type, get_db_type_from_pd_type 1309 1310 dt_col = pipe.columns.get('datetime', None) 1311 existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else [] 1312 skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce 1313 dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None 1314 dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None 1315 select_columns = ( 1316 [col for col in existing_cols] 1317 if not select_columns 1318 else [col for col in select_columns if skip_existing_cols_check or col in existing_cols] 1319 ) 1320 if omit_columns: 1321 select_columns = [col for col in select_columns if col not in omit_columns] 1322 1323 if order is None and sort_datetimes: 1324 order = 'desc' 1325 1326 if begin == '': 1327 begin = pipe.get_sync_time(debug=debug) 1328 backtrack_interval = pipe.get_backtrack_interval(debug=debug) 1329 if begin is not None: 1330 begin -= backtrack_interval 1331 1332 begin, end = pipe.parse_date_bounds(begin, end) 1333 if isinstance(begin, datetime) and dt_typ: 1334 begin = coerce_timezone(begin, strip_utc=('utc' not in dt_typ.lower())) 1335 if isinstance(end, datetime) and dt_typ: 1336 end = coerce_timezone(end, strip_utc=('utc' not in dt_typ.lower())) 1337 1338 cols_names = [ 1339 sql_item_name(col, self.flavor, None) 1340 for col in select_columns 1341 ] 1342 select_cols_str = ( 1343 'SELECT\n ' 1344 + ',\n '.join( 1345 [ 1346 ( 1347 col_name 1348 if not replace_nulls 1349 else f"COALESCE(col_name, '{replace_nulls}') AS {col_name}" 1350 ) 1351 for col_name in cols_names 1352 ] 1353 ) 1354 ) if cols_names else 'SELECT *' 1355 pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 1356 query = f"{select_cols_str}\nFROM {pipe_table_name}" 1357 where = "" 1358 1359 if order is not None: 1360 default_order = 'asc' 1361 if order not in ('asc', 'desc'): 1362 warn(f"Ignoring unsupported order '{order}'. Falling back to '{default_order}'.") 1363 order = default_order 1364 order = order.upper() 1365 1366 if not pipe.columns.get('datetime', None): 1367 _dt = pipe.guess_datetime() 1368 dt = sql_item_name(_dt, self.flavor, None) if _dt else None 1369 is_guess = True 1370 else: 1371 _dt = pipe.get_columns('datetime') 1372 dt = sql_item_name(_dt, self.flavor, None) 1373 is_guess = False 1374 1375 quoted_indices = { 1376 key: sql_item_name(val, self.flavor, None) 1377 for key, val in pipe.columns.items() 1378 if val in existing_cols or skip_existing_cols_check 1379 } 1380 1381 if begin is not None or end is not None: 1382 if is_guess: 1383 if _dt is None: 1384 warn( 1385 f"No datetime could be determined for {pipe}." 1386 + "\n Ignoring begin and end...", 1387 stack=False, 1388 ) 1389 begin, end = None, None 1390 else: 1391 warn( 1392 f"A datetime wasn't specified for {pipe}.\n" 1393 + f" Using column \"{_dt}\" for datetime bounds...", 1394 stack=False, 1395 ) 1396 1397 is_dt_bound = False 1398 if begin is not None and (_dt in existing_cols or skip_existing_cols_check): 1399 begin_da = dateadd_str( 1400 flavor=self.flavor, 1401 datepart='minute', 1402 number=begin_add_minutes, 1403 begin=begin, 1404 db_type=dt_db_type, 1405 ) 1406 where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "") 1407 is_dt_bound = True 1408 1409 if end is not None and (_dt in existing_cols or skip_existing_cols_check): 1410 if 'int' in str(type(end)).lower() and end == begin: 1411 end += 1 1412 end_da = dateadd_str( 1413 flavor=self.flavor, 1414 datepart='minute', 1415 number=end_add_minutes, 1416 begin=end, 1417 db_type=dt_db_type, 1418 ) 1419 where += f"{dt} < {end_da}" 1420 is_dt_bound = True 1421 1422 if params is not None: 1423 from meerschaum.utils.sql import build_where 1424 valid_params = { 1425 k: v 1426 for k, v in params.items() 1427 if k in existing_cols or skip_existing_cols_check 1428 } 1429 if valid_params: 1430 where += ' ' + build_where(valid_params, self).lstrip().replace( 1431 'WHERE', (' AND' if is_dt_bound else " ") 1432 ) 1433 1434 if len(where) > 0: 1435 query += "\nWHERE " + where 1436 1437 if order is not None: 1438 ### Sort by indices, starting with datetime. 1439 order_by = "" 1440 if quoted_indices: 1441 order_by += "\nORDER BY " 1442 if _dt and (_dt in existing_cols or skip_existing_cols_check): 1443 order_by += dt + ' ' + order + ',' 1444 for key, quoted_col_name in quoted_indices.items(): 1445 if dt == quoted_col_name: 1446 continue 1447 order_by += ' ' + quoted_col_name + ' ' + order + ',' 1448 order_by = order_by[:-1] 1449 1450 query += order_by 1451 1452 if isinstance(limit, int): 1453 if self.flavor == 'mssql': 1454 query = f'SELECT TOP {limit}\n' + query[len("SELECT "):] 1455 elif self.flavor == 'oracle': 1456 query = ( 1457 f"SELECT * FROM (\n {query}\n)\n" 1458 + f"WHERE ROWNUM IN ({', '.join([str(i) for i in range(1, limit+1)])})" 1459 ) 1460 else: 1461 query += f"\nLIMIT {limit}" 1462 1463 if debug: 1464 to_print = ( 1465 [] 1466 + ([f"begin='{begin}'"] if begin else []) 1467 + ([f"end='{end}'"] if end else []) 1468 + ([f"params={params}"] if params else []) 1469 ) 1470 dprint("Getting pipe data with constraints: " + items_str(to_print, quotes=False)) 1471 1472 return query
Return the SELECT query for retrieving a pipe's data from its instance.
Parameters
- pipe (mrsm.Pipe:): The pipe to get data from.
- select_columns (Optional[List[str]], default None):
If provided, only select these given columns.
Otherwise select all available columns (i.e.
SELECT *). - omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
- begin (Union[datetime, int, str, None], default None): If provided, get rows newer than or equal to this value.
- end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
- params (Optional[Dict[str, Any]], default None):
Additional parameters to filter by.
See
meerschaum.connectors.sql.build_where. - order (Optional[str], default None):
The selection order for all of the indices in the query.
If
None, omit theORDER BYclause. - sort_datetimes (bool, default False):
Alias for
order='desc'. - limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
- begin_add_minutes (int, default 0):
The number of minutes to add to the
begindatetime (i.e.DATEADD). - end_add_minutes (int, default 0):
The number of minutes to add to the
enddatetime (i.e.DATEADD). - chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
- replace_nulls (Optional[str], default None): If provided, replace null values with this value.
- skip_existing_cols_check (bool, default False):
If
True, do not verify that querying columns are actually on the table. - debug (bool, default False): Verbosity toggle.
Returns
- A
SELECTquery to retrieve a pipe's data.
21def register_pipe( 22 self, 23 pipe: mrsm.Pipe, 24 debug: bool = False, 25) -> SuccessTuple: 26 """ 27 Register a new pipe. 28 A pipe's attributes must be set before registering. 29 """ 30 from meerschaum.utils.packages import attempt_import 31 from meerschaum.utils.sql import json_flavors 32 33 ### ensure pipes table exists 34 from meerschaum.connectors.sql.tables import get_tables 35 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 36 37 if pipe.get_id(debug=debug) is not None: 38 return False, f"{pipe} is already registered." 39 40 ### NOTE: if `parameters` is supplied in the Pipe constructor, 41 ### then `pipe.parameters` will exist and not be fetched from the database. 42 43 ### 1. Prioritize the Pipe object's `parameters` first. 44 ### E.g. if the user manually sets the `parameters` property 45 ### or if the Pipe already exists 46 ### (which shouldn't be able to be registered anyway but that's an issue for later). 47 parameters = None 48 try: 49 parameters = pipe.get_parameters(apply_symlinks=False) 50 except Exception as e: 51 if debug: 52 dprint(str(e)) 53 parameters = None 54 55 ### ensure `parameters` is a dictionary 56 if parameters is None: 57 parameters = {} 58 59 import json 60 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 61 values = { 62 'connector_keys' : pipe.connector_keys, 63 'metric_key' : pipe.metric_key, 64 'location_key' : pipe.location_key, 65 'parameters' : ( 66 json.dumps(parameters) 67 if self.flavor not in json_flavors 68 else parameters 69 ), 70 } 71 query = sqlalchemy.insert(pipes_tbl).values(**values) 72 result = self.exec(query, debug=debug) 73 if result is None: 74 return False, f"Failed to register {pipe}." 75 return True, f"Successfully registered {pipe}."
Register a new pipe. A pipe's attributes must be set before registering.
78def edit_pipe( 79 self, 80 pipe: mrsm.Pipe, 81 patch: bool = False, 82 debug: bool = False, 83 **kw : Any 84) -> SuccessTuple: 85 """ 86 Persist a Pipe's parameters to its database. 87 88 Parameters 89 ---------- 90 pipe: mrsm.Pipe, default None 91 The pipe to be edited. 92 patch: bool, default False 93 If patch is `True`, update the existing parameters by cascading. 94 Otherwise overwrite the parameters (default). 95 debug: bool, default False 96 Verbosity toggle. 97 """ 98 99 if pipe.id is None: 100 return False, f"{pipe} is not registered and cannot be edited." 101 102 from meerschaum.utils.packages import attempt_import 103 from meerschaum.utils.sql import json_flavors 104 if not patch: 105 parameters = pipe.__dict__.get('_attributes', {}).get('parameters', {}) 106 else: 107 from meerschaum import Pipe 108 from meerschaum.config._patch import apply_patch_to_config 109 original_parameters = Pipe( 110 pipe.connector_keys, pipe.metric_key, pipe.location_key, 111 mrsm_instance=pipe.instance_keys 112 ).get_parameters(apply_symlinks=False) 113 parameters = apply_patch_to_config( 114 original_parameters, 115 pipe._attributes['parameters'] 116 ) 117 118 ### ensure pipes table exists 119 from meerschaum.connectors.sql.tables import get_tables 120 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 121 122 import json 123 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 124 125 values = { 126 'parameters': ( 127 json.dumps(parameters) 128 if self.flavor not in json_flavors 129 else parameters 130 ), 131 } 132 q = sqlalchemy.update(pipes_tbl).values(**values).where( 133 pipes_tbl.c.pipe_id == pipe.id 134 ) 135 136 result = self.exec(q, debug=debug) 137 message = ( 138 f"Successfully edited {pipe}." 139 if result is not None else f"Failed to edit {pipe}." 140 ) 141 return (result is not None), message
Persist a Pipe's parameters to its database.
Parameters
- pipe (mrsm.Pipe, default None): The pipe to be edited.
- patch (bool, default False):
If patch is
True, update the existing parameters by cascading. Otherwise overwrite the parameters (default). - debug (bool, default False): Verbosity toggle.
1475def get_pipe_id( 1476 self, 1477 pipe: mrsm.Pipe, 1478 debug: bool = False, 1479) -> Any: 1480 """ 1481 Get a Pipe's ID from the pipes table. 1482 """ 1483 if pipe.temporary: 1484 return None 1485 from meerschaum.utils.packages import attempt_import 1486 sqlalchemy = attempt_import('sqlalchemy') 1487 from meerschaum.connectors.sql.tables import get_tables 1488 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 1489 1490 query = sqlalchemy.select(pipes_tbl.c.pipe_id).where( 1491 pipes_tbl.c.connector_keys == pipe.connector_keys 1492 ).where( 1493 pipes_tbl.c.metric_key == pipe.metric_key 1494 ).where( 1495 (pipes_tbl.c.location_key == pipe.location_key) if pipe.location_key is not None 1496 else pipes_tbl.c.location_key.is_(None) 1497 ) 1498 _id = self.value(query, debug=debug, silent=pipe.temporary) 1499 if _id is not None: 1500 _id = int(_id) 1501 return _id
Get a Pipe's ID from the pipes table.
1504def get_pipe_attributes( 1505 self, 1506 pipe: mrsm.Pipe, 1507 debug: bool = False, 1508) -> Dict[str, Any]: 1509 """ 1510 Get a Pipe's attributes dictionary. 1511 """ 1512 from meerschaum.connectors.sql.tables import get_tables 1513 from meerschaum.utils.packages import attempt_import 1514 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 1515 1516 if pipe.get_id(debug=debug) is None: 1517 return {} 1518 1519 pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes'] 1520 1521 try: 1522 q = sqlalchemy.select(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id) 1523 if debug: 1524 dprint(q) 1525 rows = ( 1526 self.exec(q, silent=True, debug=debug).mappings().all() 1527 if self.flavor != 'duckdb' 1528 else self.read(q, debug=debug).to_dict(orient='records') 1529 ) 1530 if not rows: 1531 return {} 1532 attributes = dict(rows[0]) 1533 except Exception: 1534 warn(traceback.format_exc()) 1535 return {} 1536 1537 ### handle non-PostgreSQL databases (text vs JSON) 1538 if not isinstance(attributes.get('parameters', None), dict): 1539 try: 1540 import json 1541 parameters = json.loads(attributes['parameters']) 1542 if isinstance(parameters, str) and parameters[0] == '{': 1543 parameters = json.loads(parameters) 1544 attributes['parameters'] = parameters 1545 except Exception: 1546 attributes['parameters'] = {} 1547 1548 return attributes
Get a Pipe's attributes dictionary.
1634def sync_pipe( 1635 self, 1636 pipe: mrsm.Pipe, 1637 df: Union[pd.DataFrame, str, Dict[Any, Any], None] = None, 1638 begin: Union[datetime, int, None] = None, 1639 end: Union[datetime, int, None] = None, 1640 chunksize: Optional[int] = -1, 1641 check_existing: bool = True, 1642 blocking: bool = True, 1643 debug: bool = False, 1644 _check_temporary_tables: bool = True, 1645 **kw: Any 1646) -> SuccessTuple: 1647 """ 1648 Sync a pipe using a database connection. 1649 1650 Parameters 1651 ---------- 1652 pipe: mrsm.Pipe 1653 The Meerschaum Pipe instance into which to sync the data. 1654 1655 df: Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]] 1656 An optional DataFrame or equivalent to sync into the pipe. 1657 Defaults to `None`. 1658 1659 begin: Union[datetime, int, None], default None 1660 Optionally specify the earliest datetime to search for data. 1661 Defaults to `None`. 1662 1663 end: Union[datetime, int, None], default None 1664 Optionally specify the latest datetime to search for data. 1665 Defaults to `None`. 1666 1667 chunksize: Optional[int], default -1 1668 Specify the number of rows to sync per chunk. 1669 If `-1`, resort to system configuration (default is `900`). 1670 A `chunksize` of `None` will sync all rows in one transaction. 1671 Defaults to `-1`. 1672 1673 check_existing: bool, default True 1674 If `True`, pull and diff with existing data from the pipe. Defaults to `True`. 1675 1676 blocking: bool, default True 1677 If `True`, wait for sync to finish and return its result, otherwise asyncronously sync. 1678 Defaults to `True`. 1679 1680 debug: bool, default False 1681 Verbosity toggle. Defaults to False. 1682 1683 kw: Any 1684 Catch-all for keyword arguments. 1685 1686 Returns 1687 ------- 1688 A `SuccessTuple` of success (`bool`) and message (`str`). 1689 """ 1690 from meerschaum.utils.packages import import_pandas 1691 from meerschaum.utils.sql import ( 1692 get_update_queries, 1693 sql_item_name, 1694 UPDATE_QUERIES, 1695 get_reset_autoincrement_queries, 1696 ) 1697 from meerschaum.utils.dtypes import get_current_timestamp 1698 from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type 1699 from meerschaum.utils.dataframe import get_special_cols 1700 from meerschaum import Pipe 1701 import time 1702 import copy 1703 pd = import_pandas() 1704 if df is None: 1705 msg = f"DataFrame is None. Cannot sync {pipe}." 1706 warn(msg) 1707 return False, msg 1708 1709 start = time.perf_counter() 1710 pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe)) 1711 dtypes = pipe.get_dtypes(debug=debug) 1712 1713 if not pipe.temporary and not pipe.get_id(debug=debug): 1714 register_tuple = pipe.register(debug=debug) 1715 if not register_tuple[0]: 1716 return register_tuple 1717 1718 ### df is the dataframe returned from the remote source 1719 ### via the connector 1720 if debug: 1721 dprint("Fetched data:\n" + str(df)) 1722 1723 if not isinstance(df, pd.DataFrame): 1724 df = pipe.enforce_dtypes( 1725 df, 1726 chunksize=chunksize, 1727 safe_copy=kw.get('safe_copy', False), 1728 dtypes=dtypes, 1729 debug=debug, 1730 ) 1731 1732 ### if table does not exist, create it with indices 1733 is_new = False 1734 if not pipe.exists(debug=debug): 1735 check_existing = False 1736 is_new = True 1737 else: 1738 ### Check for new columns. 1739 add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug) 1740 if add_cols_queries: 1741 pipe._clear_cache_key('_columns_types', debug=debug) 1742 pipe._clear_cache_key('_columns_indices', debug=debug) 1743 if not self.exec_queries(add_cols_queries, debug=debug): 1744 warn(f"Failed to add new columns to {pipe}.") 1745 1746 alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug) 1747 if alter_cols_queries: 1748 pipe._clear_cache_key('_columns_types', debug=debug) 1749 pipe._clear_cache_key('_columns_types', debug=debug) 1750 if not self.exec_queries(alter_cols_queries, debug=debug): 1751 warn(f"Failed to alter columns for {pipe}.") 1752 1753 upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES 1754 if upsert: 1755 check_existing = False 1756 kw['safe_copy'] = kw.get('safe_copy', False) 1757 1758 unseen_df, update_df, delta_df = ( 1759 pipe.filter_existing( 1760 df, 1761 chunksize=chunksize, 1762 debug=debug, 1763 **kw 1764 ) if check_existing else (df, None, df) 1765 ) 1766 if upsert: 1767 unseen_df, update_df, delta_df = (df.head(0), df, df) 1768 1769 if debug: 1770 dprint("Delta data:\n" + str(delta_df)) 1771 dprint("Unseen data:\n" + str(unseen_df)) 1772 if update_df is not None: 1773 dprint(("Update" if not upsert else "Upsert") + " data:\n" + str(update_df)) 1774 1775 if_exists = kw.get('if_exists', 'append') 1776 if 'if_exists' in kw: 1777 kw.pop('if_exists') 1778 if 'name' in kw: 1779 kw.pop('name') 1780 1781 ### Insert new data into the target table. 1782 unseen_kw = copy.deepcopy(kw) 1783 unseen_kw.update({ 1784 'name': pipe.target, 1785 'if_exists': if_exists, 1786 'debug': debug, 1787 'as_dict': True, 1788 'safe_copy': kw.get('safe_copy', False), 1789 'chunksize': chunksize, 1790 'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True), 1791 'schema': self.get_pipe_schema(pipe), 1792 }) 1793 1794 dt_col = pipe.columns.get('datetime', None) 1795 primary_key = pipe.columns.get('primary', None) 1796 autoincrement = ( 1797 pipe.parameters.get('autoincrement', False) 1798 or ( 1799 is_new 1800 and primary_key 1801 and primary_key 1802 not in dtypes 1803 and primary_key not in unseen_df.columns 1804 ) 1805 ) 1806 if autoincrement and autoincrement not in pipe.parameters: 1807 update_success, update_msg = pipe.update_parameters( 1808 {'autoincrement': autoincrement}, 1809 debug=debug, 1810 ) 1811 if not update_success: 1812 return update_success, update_msg 1813 1814 def _check_pk(_df_to_clear): 1815 if _df_to_clear is None: 1816 return 1817 if primary_key not in _df_to_clear.columns: 1818 return 1819 if not _df_to_clear[primary_key].notnull().any(): 1820 del _df_to_clear[primary_key] 1821 1822 autoincrement_needs_reset = bool( 1823 autoincrement 1824 and primary_key 1825 and primary_key in unseen_df.columns 1826 and unseen_df[primary_key].notnull().any() 1827 ) 1828 if autoincrement and primary_key: 1829 for _df_to_clear in (unseen_df, update_df, delta_df): 1830 _check_pk(_df_to_clear) 1831 1832 if is_new: 1833 create_success, create_msg = self.create_pipe_table_from_df( 1834 pipe, 1835 unseen_df, 1836 debug=debug, 1837 ) 1838 if not create_success: 1839 return create_success, create_msg 1840 1841 do_identity_insert = bool( 1842 self.flavor in ('mssql',) 1843 and primary_key 1844 and primary_key in unseen_df.columns 1845 and autoincrement 1846 ) 1847 stats = {'success': True, 'msg': ''} 1848 if len(unseen_df) > 0: 1849 with self.engine.connect() as connection: 1850 with connection.begin(): 1851 if do_identity_insert: 1852 identity_on_result = self.exec( 1853 f"SET IDENTITY_INSERT {pipe_name} ON", 1854 commit=False, 1855 _connection=connection, 1856 close=False, 1857 debug=debug, 1858 ) 1859 if identity_on_result is None: 1860 return False, f"Could not enable identity inserts on {pipe}." 1861 1862 stats = self.to_sql( 1863 unseen_df, 1864 _connection=connection, 1865 **unseen_kw 1866 ) 1867 1868 if do_identity_insert: 1869 identity_off_result = self.exec( 1870 f"SET IDENTITY_INSERT {pipe_name} OFF", 1871 commit=False, 1872 _connection=connection, 1873 close=False, 1874 debug=debug, 1875 ) 1876 if identity_off_result is None: 1877 return False, f"Could not disable identity inserts on {pipe}." 1878 1879 if is_new: 1880 if not self.create_indices(pipe, debug=debug): 1881 warn(f"Failed to create indices for {pipe}. Continuing...") 1882 1883 if autoincrement_needs_reset: 1884 reset_autoincrement_queries = get_reset_autoincrement_queries( 1885 pipe.target, 1886 primary_key, 1887 self, 1888 schema=self.get_pipe_schema(pipe), 1889 debug=debug, 1890 ) 1891 results = self.exec_queries(reset_autoincrement_queries, debug=debug) 1892 for result in results: 1893 if result is None: 1894 warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False) 1895 1896 if update_df is not None and len(update_df) > 0: 1897 temp_target = self.get_temporary_target( 1898 pipe.target, 1899 label=('update' if not upsert else 'upsert'), 1900 ) 1901 self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug) 1902 update_dtypes = { 1903 **{ 1904 col: str(typ) 1905 for col, typ in update_df.dtypes.items() 1906 }, 1907 **get_special_cols(update_df) 1908 } 1909 1910 temp_pipe = Pipe( 1911 pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key, 1912 instance=pipe.instance_keys, 1913 columns={ 1914 (ix_key if ix_key != 'primary' else 'primary_'): ix 1915 for ix_key, ix in pipe.columns.items() 1916 if ix and ix in update_df.columns 1917 }, 1918 dtypes=update_dtypes, 1919 target=temp_target, 1920 temporary=True, 1921 enforce=False, 1922 static=True, 1923 autoincrement=False, 1924 cache=False, 1925 parameters={ 1926 'schema': self.internal_schema, 1927 'hypertable': False, 1928 }, 1929 ) 1930 _temp_columns_types = { 1931 col: get_db_type_from_pd_type(typ, self.flavor) 1932 for col, typ in update_dtypes.items() 1933 } 1934 temp_pipe._cache_value('_columns_types', _temp_columns_types, memory_only=True, debug=debug) 1935 temp_pipe._cache_value('_skip_check_indices', True, memory_only=True, debug=debug) 1936 now_ts = get_current_timestamp('ms', as_int=True) / 1000 1937 temp_pipe._cache_value('_columns_types_timestamp', now_ts, memory_only=True, debug=debug) 1938 temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug) 1939 if not temp_success: 1940 return temp_success, temp_msg 1941 1942 existing_cols = pipe.get_columns_types(debug=debug) 1943 join_cols = [ 1944 col 1945 for col_key, col in pipe.columns.items() 1946 if col and col in existing_cols 1947 ] if not primary_key or self.flavor == 'oracle' else ( 1948 [dt_col, primary_key] 1949 if ( 1950 self.flavor in ('timescaledb', 'timescaledb-ha') 1951 and dt_col 1952 and dt_col in update_df.columns 1953 ) 1954 else [primary_key] 1955 ) 1956 update_queries = get_update_queries( 1957 pipe.target, 1958 temp_target, 1959 self, 1960 join_cols, 1961 upsert=upsert, 1962 schema=self.get_pipe_schema(pipe), 1963 patch_schema=self.internal_schema, 1964 target_cols_types=pipe.get_columns_types(debug=debug), 1965 patch_cols_types=_temp_columns_types, 1966 datetime_col=(dt_col if dt_col in update_df.columns else None), 1967 identity_insert=(autoincrement and primary_key in update_df.columns), 1968 null_indices=pipe.null_indices, 1969 cast_columns=pipe.enforce, 1970 debug=debug, 1971 ) 1972 update_results = self.exec_queries( 1973 update_queries, 1974 break_on_error=True, 1975 rollback=True, 1976 debug=debug, 1977 ) 1978 update_success = all(update_results) 1979 self._log_temporary_tables_creation( 1980 temp_target, 1981 ready_to_drop=True, 1982 create=(not pipe.temporary), 1983 debug=debug, 1984 ) 1985 if not update_success: 1986 warn(f"Failed to apply update to {pipe}.") 1987 stats['success'] = stats['success'] and update_success 1988 stats['msg'] = ( 1989 (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip() 1990 if not update_success 1991 else stats.get('msg', '') 1992 ) 1993 1994 stop = time.perf_counter() 1995 success = stats['success'] 1996 if not success: 1997 return success, stats['msg'] or str(stats) 1998 1999 unseen_count = len(unseen_df.index) if unseen_df is not None else 0 2000 update_count = len(update_df.index) if update_df is not None else 0 2001 msg = ( 2002 ( 2003 f"Inserted {unseen_count:,}, " 2004 + f"updated {update_count:,} rows." 2005 ) 2006 if not upsert 2007 else ( 2008 f"Upserted {update_count:,} row" 2009 + ('s' if update_count != 1 else '') 2010 + "." 2011 ) 2012 ) 2013 if debug: 2014 msg = msg[:-1] + ( 2015 f"\non table {sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))}\n" 2016 + f"in {round(stop - start, 2)} seconds." 2017 ) 2018 2019 if _check_temporary_tables: 2020 drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables( 2021 refresh=False, debug=debug 2022 ) 2023 if not drop_stale_success: 2024 warn(drop_stale_msg) 2025 2026 return success, msg
Sync a pipe using a database connection.
Parameters
- pipe (mrsm.Pipe): The Meerschaum Pipe instance into which to sync the data.
- df (Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]]):
An optional DataFrame or equivalent to sync into the pipe.
Defaults to
None. - begin (Union[datetime, int, None], default None):
Optionally specify the earliest datetime to search for data.
Defaults to
None. - end (Union[datetime, int, None], default None):
Optionally specify the latest datetime to search for data.
Defaults to
None. - chunksize (Optional[int], default -1):
Specify the number of rows to sync per chunk.
If
-1, resort to system configuration (default is900). AchunksizeofNonewill sync all rows in one transaction. Defaults to-1. - check_existing (bool, default True):
If
True, pull and diff with existing data from the pipe. Defaults toTrue. - blocking (bool, default True):
If
True, wait for sync to finish and return its result, otherwise asyncronously sync. Defaults toTrue. - debug (bool, default False): Verbosity toggle. Defaults to False.
- kw (Any): Catch-all for keyword arguments.
Returns
- A
SuccessTupleof success (bool) and message (str).
2029def sync_pipe_inplace( 2030 self, 2031 pipe: 'mrsm.Pipe', 2032 params: Optional[Dict[str, Any]] = None, 2033 begin: Union[datetime, int, None] = None, 2034 end: Union[datetime, int, None] = None, 2035 chunksize: Optional[int] = -1, 2036 check_existing: bool = True, 2037 debug: bool = False, 2038 **kw: Any 2039) -> SuccessTuple: 2040 """ 2041 If a pipe's connector is the same as its instance connector, 2042 it's more efficient to sync the pipe in-place rather than reading data into Pandas. 2043 2044 Parameters 2045 ---------- 2046 pipe: mrsm.Pipe 2047 The pipe whose connector is the same as its instance. 2048 2049 params: Optional[Dict[str, Any]], default None 2050 Optional params dictionary to build the `WHERE` clause. 2051 See `meerschaum.utils.sql.build_where`. 2052 2053 begin: Union[datetime, int, None], default None 2054 Optionally specify the earliest datetime to search for data. 2055 Defaults to `None`. 2056 2057 end: Union[datetime, int, None], default None 2058 Optionally specify the latest datetime to search for data. 2059 Defaults to `None`. 2060 2061 chunksize: Optional[int], default -1 2062 Specify the number of rows to sync per chunk. 2063 If `-1`, resort to system configuration (default is `900`). 2064 A `chunksize` of `None` will sync all rows in one transaction. 2065 Defaults to `-1`. 2066 2067 check_existing: bool, default True 2068 If `True`, pull and diff with existing data from the pipe. 2069 2070 debug: bool, default False 2071 Verbosity toggle. 2072 2073 Returns 2074 ------- 2075 A SuccessTuple. 2076 """ 2077 if self.flavor == 'duckdb': 2078 return pipe.sync( 2079 params=params, 2080 begin=begin, 2081 end=end, 2082 chunksize=chunksize, 2083 check_existing=check_existing, 2084 debug=debug, 2085 _inplace=False, 2086 **kw 2087 ) 2088 from meerschaum.utils.sql import ( 2089 sql_item_name, 2090 get_update_queries, 2091 get_null_replacement, 2092 get_create_table_queries, 2093 get_create_schema_if_not_exists_queries, 2094 get_table_cols_types, 2095 session_execute, 2096 dateadd_str, 2097 UPDATE_QUERIES, 2098 ) 2099 from meerschaum.utils.dtypes.sql import ( 2100 get_pd_type_from_db_type, 2101 get_db_type_from_pd_type, 2102 ) 2103 from meerschaum.utils.misc import generate_password 2104 2105 transaction_id_length = ( 2106 mrsm.get_config( 2107 'system', 'connectors', 'sql', 'instance', 'temporary_target', 'transaction_id_length' 2108 ) 2109 ) 2110 transact_id = generate_password(transaction_id_length) 2111 2112 internal_schema = self.internal_schema 2113 target = pipe.target 2114 temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update'] 2115 temp_tables = { 2116 table_root: self.get_temporary_target(target, transact_id=transact_id, label=table_root) 2117 for table_root in temp_table_roots 2118 } 2119 temp_table_names = { 2120 table_root: sql_item_name(table_name_raw, self.flavor, internal_schema) 2121 for table_root, table_name_raw in temp_tables.items() 2122 } 2123 temp_table_aliases = { 2124 table_root: sql_item_name(table_root, self.flavor) 2125 for table_root in temp_table_roots 2126 } 2127 table_alias_as = " AS" if self.flavor != 'oracle' else '' 2128 metadef = self.get_pipe_metadef( 2129 pipe, 2130 params=params, 2131 begin=begin, 2132 end=end, 2133 check_existing=check_existing, 2134 debug=debug, 2135 ) 2136 pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 2137 upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in UPDATE_QUERIES 2138 static = pipe.parameters.get('static', False) 2139 database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None)) 2140 primary_key = pipe.columns.get('primary', None) 2141 primary_key_typ = pipe.dtypes.get(primary_key, None) if primary_key else None 2142 primary_key_db_type = ( 2143 get_db_type_from_pd_type(primary_key_typ, self.flavor) 2144 if primary_key_typ 2145 else None 2146 ) 2147 if not {col_key: col for col_key, col in pipe.columns.items() if col_key and col}: 2148 return False, "Cannot sync in-place without index columns." 2149 2150 autoincrement = pipe.parameters.get('autoincrement', False) 2151 dt_col = pipe.columns.get('datetime', None) 2152 dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None 2153 dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None 2154 dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None 2155 2156 def clean_up_temp_tables(ready_to_drop: bool = False): 2157 log_success, log_msg = self._log_temporary_tables_creation( 2158 [ 2159 table 2160 for table in temp_tables.values() 2161 ] if not upsert else [temp_tables['update']], 2162 ready_to_drop=ready_to_drop, 2163 create=(not pipe.temporary), 2164 debug=debug, 2165 ) 2166 if not log_success: 2167 warn(log_msg) 2168 drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables( 2169 refresh=False, 2170 debug=debug, 2171 ) 2172 if not drop_stale_success: 2173 warn(drop_stale_msg) 2174 return drop_stale_success, drop_stale_msg 2175 2176 sqlalchemy, sqlalchemy_orm = mrsm.attempt_import( 2177 'sqlalchemy', 2178 'sqlalchemy.orm', 2179 ) 2180 if not pipe.exists(debug=debug): 2181 schema = self.get_pipe_schema(pipe) 2182 create_pipe_queries = get_create_table_queries( 2183 metadef, 2184 pipe.target, 2185 self.flavor, 2186 schema=schema, 2187 primary_key=primary_key, 2188 primary_key_db_type=primary_key_db_type, 2189 autoincrement=autoincrement, 2190 datetime_column=dt_col, 2191 ) 2192 if schema: 2193 create_pipe_queries = ( 2194 get_create_schema_if_not_exists_queries(schema, self.flavor) 2195 + create_pipe_queries 2196 ) 2197 2198 results = self.exec_queries(create_pipe_queries, debug=debug) 2199 if not all(results): 2200 _ = clean_up_temp_tables() 2201 return False, f"Could not insert new data into {pipe} from its SQL query definition." 2202 2203 if not self.create_indices(pipe, debug=debug): 2204 warn(f"Failed to create indices for {pipe}. Continuing...") 2205 2206 rowcount = pipe.get_rowcount(debug=debug) 2207 _ = clean_up_temp_tables() 2208 return True, f"Inserted {rowcount:,}, updated 0 rows." 2209 2210 session = sqlalchemy_orm.Session(self.engine) 2211 connectable = session if self.flavor != 'duckdb' else self 2212 2213 create_new_query = get_create_table_queries( 2214 metadef, 2215 temp_tables[('new') if not upsert else 'update'], 2216 self.flavor, 2217 schema=internal_schema, 2218 )[0] 2219 (create_new_success, create_new_msg), create_new_results = session_execute( 2220 session, 2221 create_new_query, 2222 with_results=True, 2223 debug=debug, 2224 ) 2225 if not create_new_success: 2226 _ = clean_up_temp_tables() 2227 return create_new_success, create_new_msg 2228 new_count = create_new_results[0].rowcount if create_new_results else 0 2229 2230 new_cols_types = get_table_cols_types( 2231 temp_tables[('new' if not upsert else 'update')], 2232 connectable=connectable, 2233 flavor=self.flavor, 2234 schema=internal_schema, 2235 database=database, 2236 debug=debug, 2237 ) if not static else pipe.get_columns_types(debug=debug) 2238 if not new_cols_types: 2239 return False, f"Failed to get new columns for {pipe}." 2240 2241 new_cols = { 2242 str(col_name): get_pd_type_from_db_type(str(col_type)) 2243 for col_name, col_type in new_cols_types.items() 2244 } 2245 new_cols_str = '\n ' + ',\n '.join([ 2246 sql_item_name(col, self.flavor) 2247 for col in new_cols 2248 ]) 2249 def get_col_typ(col: str, cols_types: Dict[str, str]) -> str: 2250 if self.flavor == 'oracle' and new_cols_types.get(col, '').lower() == 'char': 2251 return new_cols_types[col] 2252 return cols_types[col] 2253 2254 add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug) 2255 if add_cols_queries: 2256 pipe._clear_cache_key('_columns_types', debug=debug) 2257 pipe._clear_cache_key('_columns_indices', debug=debug) 2258 self.exec_queries(add_cols_queries, debug=debug) 2259 2260 alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug) 2261 if alter_cols_queries: 2262 pipe._clear_cache_key('_columns_types', debug=debug) 2263 self.exec_queries(alter_cols_queries, debug=debug) 2264 2265 insert_queries = [ 2266 ( 2267 f"INSERT INTO {pipe_name} ({new_cols_str})\n" 2268 f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}{table_alias_as}" 2269 f" {temp_table_aliases['new']}" 2270 ) 2271 ] if not check_existing and not upsert else [] 2272 2273 new_queries = insert_queries 2274 new_success, new_msg = ( 2275 session_execute(session, new_queries, debug=debug) 2276 if new_queries 2277 else (True, "Success") 2278 ) 2279 if not new_success: 2280 _ = clean_up_temp_tables() 2281 return new_success, new_msg 2282 2283 if not check_existing: 2284 session.commit() 2285 _ = clean_up_temp_tables() 2286 return True, f"Inserted {new_count}, updated 0 rows." 2287 2288 min_dt_col_name_da = dateadd_str( 2289 flavor=self.flavor, begin=f"MIN({dt_col_name})", db_type=dt_db_type, 2290 ) 2291 max_dt_col_name_da = dateadd_str( 2292 flavor=self.flavor, begin=f"MAX({dt_col_name})", db_type=dt_db_type, 2293 ) 2294 2295 (new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute( 2296 session, 2297 [ 2298 "SELECT\n" 2299 f" {min_dt_col_name_da} AS {sql_item_name('min_dt', self.flavor)},\n" 2300 f" {max_dt_col_name_da} AS {sql_item_name('max_dt', self.flavor)}\n" 2301 f"FROM {temp_table_names['new' if not upsert else 'update']}\n" 2302 f"WHERE {dt_col_name} IS NOT NULL" 2303 ], 2304 with_results=True, 2305 debug=debug, 2306 ) if dt_col and not upsert else ((True, "Success"), None) 2307 if not new_dt_bounds_success: 2308 return ( 2309 new_dt_bounds_success, 2310 f"Could not determine in-place datetime bounds:\n{new_dt_bounds_msg}" 2311 ) 2312 2313 if dt_col and not upsert: 2314 begin, end = new_dt_bounds_results[0].fetchone() 2315 2316 backtrack_def = self.get_pipe_data_query( 2317 pipe, 2318 begin=begin, 2319 end=end, 2320 begin_add_minutes=0, 2321 end_add_minutes=1, 2322 params=params, 2323 debug=debug, 2324 order=None, 2325 ) 2326 create_backtrack_query = get_create_table_queries( 2327 backtrack_def, 2328 temp_tables['backtrack'], 2329 self.flavor, 2330 schema=internal_schema, 2331 )[0] 2332 (create_backtrack_success, create_backtrack_msg), create_backtrack_results = session_execute( 2333 session, 2334 create_backtrack_query, 2335 with_results=True, 2336 debug=debug, 2337 ) if not upsert else ((True, "Success"), None) 2338 2339 if not create_backtrack_success: 2340 _ = clean_up_temp_tables() 2341 return create_backtrack_success, create_backtrack_msg 2342 2343 backtrack_cols_types = get_table_cols_types( 2344 temp_tables['backtrack'], 2345 connectable=connectable, 2346 flavor=self.flavor, 2347 schema=internal_schema, 2348 database=database, 2349 debug=debug, 2350 ) if not (upsert or static) else new_cols_types 2351 2352 common_cols = [col for col in new_cols if col in backtrack_cols_types] 2353 primary_key = pipe.columns.get('primary', None) 2354 on_cols = { 2355 col: new_cols.get(col) 2356 for col_key, col in pipe.columns.items() 2357 if ( 2358 col 2359 and 2360 col_key != 'value' 2361 and col in backtrack_cols_types 2362 and col in new_cols 2363 ) 2364 } if not primary_key else {primary_key: new_cols.get(primary_key)} 2365 if not on_cols: 2366 raise ValueError("Cannot sync without common index columns.") 2367 2368 null_replace_new_cols_str = ( 2369 '\n ' + ',\n '.join([ 2370 f"COALESCE({temp_table_aliases['new']}.{sql_item_name(col, self.flavor)}, " 2371 + get_null_replacement(get_col_typ(col, new_cols_types), self.flavor) 2372 + ") AS " 2373 + sql_item_name(col, self.flavor, None) 2374 for col, typ in new_cols.items() 2375 ]) 2376 ) 2377 2378 select_delta_query = ( 2379 "SELECT" 2380 + null_replace_new_cols_str 2381 + f"\nFROM {temp_table_names['new']}{table_alias_as} {temp_table_aliases['new']}\n" 2382 + f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as} {temp_table_aliases['backtrack']}" 2383 + "\n ON\n " 2384 + '\n AND\n '.join([ 2385 ( 2386 f" COALESCE({temp_table_aliases['new']}." 2387 + sql_item_name(c, self.flavor, None) 2388 + ", " 2389 + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) 2390 + ")" 2391 + '\n =\n ' 2392 + f" COALESCE({temp_table_aliases['backtrack']}." 2393 + sql_item_name(c, self.flavor, None) 2394 + ", " 2395 + get_null_replacement(get_col_typ(c, backtrack_cols_types), self.flavor) 2396 + ") " 2397 ) for c in common_cols 2398 ]) 2399 + "\nWHERE\n " 2400 + '\n AND\n '.join([ 2401 ( 2402 f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor) + ' IS NULL' 2403 ) for c in common_cols 2404 ]) 2405 ) 2406 create_delta_query = get_create_table_queries( 2407 select_delta_query, 2408 temp_tables['delta'], 2409 self.flavor, 2410 schema=internal_schema, 2411 )[0] 2412 create_delta_success, create_delta_msg = session_execute( 2413 session, 2414 create_delta_query, 2415 debug=debug, 2416 ) if not upsert else (True, "Success") 2417 if not create_delta_success: 2418 _ = clean_up_temp_tables() 2419 return create_delta_success, create_delta_msg 2420 2421 delta_cols_types = get_table_cols_types( 2422 temp_tables['delta'], 2423 connectable=connectable, 2424 flavor=self.flavor, 2425 schema=internal_schema, 2426 database=database, 2427 debug=debug, 2428 ) if not (upsert or static) else new_cols_types 2429 2430 ### This is a weird bug on SQLite. 2431 ### Sometimes the backtrack dtypes are all empty strings. 2432 if not all(delta_cols_types.values()): 2433 delta_cols_types = new_cols_types 2434 2435 delta_cols = { 2436 col: get_pd_type_from_db_type(typ) 2437 for col, typ in delta_cols_types.items() 2438 } 2439 delta_cols_str = ', '.join([ 2440 sql_item_name(col, self.flavor) 2441 for col in delta_cols 2442 ]) 2443 2444 select_joined_query = ( 2445 "SELECT\n " 2446 + (',\n '.join([ 2447 ( 2448 f"{temp_table_aliases['delta']}." + sql_item_name(c, self.flavor, None) 2449 + " AS " + sql_item_name(c + '_delta', self.flavor, None) 2450 ) for c in delta_cols 2451 ])) 2452 + ",\n " 2453 + (',\n '.join([ 2454 ( 2455 f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor, None) 2456 + " AS " + sql_item_name(c + '_backtrack', self.flavor, None) 2457 ) for c in backtrack_cols_types 2458 ])) 2459 + f"\nFROM {temp_table_names['delta']}{table_alias_as} {temp_table_aliases['delta']}\n" 2460 + f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as}" 2461 + f" {temp_table_aliases['backtrack']}" 2462 + "\n ON\n " 2463 + '\n AND\n '.join([ 2464 ( 2465 f" COALESCE({temp_table_aliases['delta']}." + sql_item_name(c, self.flavor) 2466 + ", " 2467 + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")" 2468 + '\n =\n ' 2469 + f" COALESCE({temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor) 2470 + ", " 2471 + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")" 2472 ) for c, typ in on_cols.items() 2473 ]) 2474 ) 2475 2476 create_joined_query = get_create_table_queries( 2477 select_joined_query, 2478 temp_tables['joined'], 2479 self.flavor, 2480 schema=internal_schema, 2481 )[0] 2482 create_joined_success, create_joined_msg = session_execute( 2483 session, 2484 create_joined_query, 2485 debug=debug, 2486 ) if on_cols and not upsert else (True, "Success") 2487 if not create_joined_success: 2488 _ = clean_up_temp_tables() 2489 return create_joined_success, create_joined_msg 2490 2491 select_unseen_query = ( 2492 "SELECT\n " 2493 + (',\n '.join([ 2494 ( 2495 "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None) 2496 + " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor) 2497 + " THEN " + sql_item_name(c + '_delta', self.flavor, None) 2498 + "\n ELSE NULL\n END" 2499 + " AS " + sql_item_name(c, self.flavor, None) 2500 ) for c, typ in delta_cols.items() 2501 ])) 2502 + f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n" 2503 + "WHERE\n " 2504 + '\n AND\n '.join([ 2505 ( 2506 sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL' 2507 ) for c in delta_cols 2508 ]) 2509 ) 2510 create_unseen_query = get_create_table_queries( 2511 select_unseen_query, 2512 temp_tables['unseen'], 2513 self.flavor, 2514 internal_schema, 2515 )[0] 2516 (create_unseen_success, create_unseen_msg), create_unseen_results = session_execute( 2517 session, 2518 create_unseen_query, 2519 with_results=True, 2520 debug=debug 2521 ) if not upsert else ((True, "Success"), None) 2522 if not create_unseen_success: 2523 _ = clean_up_temp_tables() 2524 return create_unseen_success, create_unseen_msg 2525 2526 select_update_query = ( 2527 "SELECT\n " 2528 + (',\n '.join([ 2529 ( 2530 "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None) 2531 + " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor) 2532 + " THEN " + sql_item_name(c + '_delta', self.flavor, None) 2533 + "\n ELSE NULL\n END" 2534 + " AS " + sql_item_name(c, self.flavor, None) 2535 ) for c, typ in delta_cols.items() 2536 ])) 2537 + f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n" 2538 + "WHERE\n " 2539 + '\n OR\n '.join([ 2540 ( 2541 sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL' 2542 ) for c in delta_cols 2543 ]) 2544 ) 2545 2546 create_update_query = get_create_table_queries( 2547 select_update_query, 2548 temp_tables['update'], 2549 self.flavor, 2550 internal_schema, 2551 )[0] 2552 (create_update_success, create_update_msg), create_update_results = session_execute( 2553 session, 2554 create_update_query, 2555 with_results=True, 2556 debug=debug, 2557 ) if on_cols and not upsert else ((True, "Success"), []) 2558 apply_update_queries = ( 2559 get_update_queries( 2560 pipe.target, 2561 temp_tables['update'], 2562 session, 2563 on_cols, 2564 upsert=upsert, 2565 schema=self.get_pipe_schema(pipe), 2566 patch_schema=internal_schema, 2567 target_cols_types=pipe.get_columns_types(debug=debug), 2568 patch_cols_types=delta_cols_types, 2569 datetime_col=pipe.columns.get('datetime', None), 2570 flavor=self.flavor, 2571 null_indices=pipe.null_indices, 2572 cast_columns=pipe.enforce, 2573 debug=debug, 2574 ) 2575 if on_cols else [] 2576 ) 2577 2578 apply_unseen_queries = [ 2579 ( 2580 f"INSERT INTO {pipe_name} ({delta_cols_str})\n" 2581 + f"SELECT {delta_cols_str}\nFROM " 2582 + ( 2583 temp_table_names['unseen'] 2584 if on_cols 2585 else temp_table_names['delta'] 2586 ) 2587 ), 2588 ] 2589 2590 (apply_unseen_success, apply_unseen_msg), apply_unseen_results = session_execute( 2591 session, 2592 apply_unseen_queries, 2593 with_results=True, 2594 debug=debug, 2595 ) if not upsert else ((True, "Success"), None) 2596 if not apply_unseen_success: 2597 _ = clean_up_temp_tables() 2598 return apply_unseen_success, apply_unseen_msg 2599 unseen_count = apply_unseen_results[0].rowcount if apply_unseen_results else 0 2600 2601 (apply_update_success, apply_update_msg), apply_update_results = session_execute( 2602 session, 2603 apply_update_queries, 2604 with_results=True, 2605 debug=debug, 2606 ) 2607 if not apply_update_success: 2608 _ = clean_up_temp_tables() 2609 return apply_update_success, apply_update_msg 2610 update_count = apply_update_results[0].rowcount if apply_update_results else 0 2611 2612 session.commit() 2613 2614 msg = ( 2615 f"Inserted {unseen_count:,}, updated {update_count:,} rows." 2616 if not upsert 2617 else f"Upserted {update_count:,} row" + ('s' if update_count != 1 else '') + "." 2618 ) 2619 _ = clean_up_temp_tables(ready_to_drop=True) 2620 2621 return True, msg
If a pipe's connector is the same as its instance connector, it's more efficient to sync the pipe in-place rather than reading data into Pandas.
Parameters
- pipe (mrsm.Pipe): The pipe whose connector is the same as its instance.
- params (Optional[Dict[str, Any]], default None):
Optional params dictionary to build the
WHEREclause. Seemeerschaum.utils.sql.build_where. - begin (Union[datetime, int, None], default None):
Optionally specify the earliest datetime to search for data.
Defaults to
None. - end (Union[datetime, int, None], default None):
Optionally specify the latest datetime to search for data.
Defaults to
None. - chunksize (Optional[int], default -1):
Specify the number of rows to sync per chunk.
If
-1, resort to system configuration (default is900). AchunksizeofNonewill sync all rows in one transaction. Defaults to-1. - check_existing (bool, default True):
If
True, pull and diff with existing data from the pipe. - debug (bool, default False): Verbosity toggle.
Returns
- A SuccessTuple.
2624def get_sync_time( 2625 self, 2626 pipe: 'mrsm.Pipe', 2627 params: Optional[Dict[str, Any]] = None, 2628 newest: bool = True, 2629 remote: bool = False, 2630 debug: bool = False, 2631) -> Union[datetime, int, None]: 2632 """Get a Pipe's most recent datetime value. 2633 2634 Parameters 2635 ---------- 2636 pipe: mrsm.Pipe 2637 The pipe to get the sync time for. 2638 2639 params: Optional[Dict[str, Any]], default None 2640 Optional params dictionary to build the `WHERE` clause. 2641 See `meerschaum.utils.sql.build_where`. 2642 2643 newest: bool, default True 2644 If `True`, get the most recent datetime (honoring `params`). 2645 If `False`, get the oldest datetime (ASC instead of DESC). 2646 2647 remote: bool, default False 2648 If `True`, return the sync time for the remote fetch definition. 2649 2650 Returns 2651 ------- 2652 A `datetime` object (or `int` if using an integer axis) if the pipe exists, otherwise `None`. 2653 """ 2654 from meerschaum.utils.sql import sql_item_name, build_where, wrap_query_with_cte 2655 src_name = sql_item_name('src', self.flavor) 2656 table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 2657 2658 dt_col = pipe.columns.get('datetime', None) 2659 if dt_col is None: 2660 return None 2661 dt_col_name = sql_item_name(dt_col, self.flavor, None) 2662 2663 if remote and pipe.connector.type != 'sql': 2664 warn(f"Cannot get the remote sync time for {pipe}.") 2665 return None 2666 2667 ASC_or_DESC = "DESC" if newest else "ASC" 2668 existing_cols = pipe.get_columns_types(debug=debug) 2669 valid_params = {} 2670 if params is not None: 2671 valid_params = {k: v for k, v in params.items() if k in existing_cols} 2672 flavor = self.flavor if not remote else pipe.connector.flavor 2673 2674 ### If no bounds are provided for the datetime column, 2675 ### add IS NOT NULL to the WHERE clause. 2676 if dt_col not in valid_params: 2677 valid_params[dt_col] = '_None' 2678 where = "" if not valid_params else build_where(valid_params, self) 2679 src_query = ( 2680 f"SELECT {dt_col_name}\nFROM {table_name}{where}" 2681 if not remote 2682 else self.get_pipe_metadef(pipe, params=params, begin=None, end=None) 2683 ) 2684 2685 base_query = ( 2686 f"SELECT {dt_col_name}\n" 2687 f"FROM {src_name}{where}\n" 2688 f"ORDER BY {dt_col_name} {ASC_or_DESC}\n" 2689 f"LIMIT 1" 2690 ) 2691 if self.flavor == 'mssql': 2692 base_query = ( 2693 f"SELECT TOP 1 {dt_col_name}\n" 2694 f"FROM {src_name}{where}\n" 2695 f"ORDER BY {dt_col_name} {ASC_or_DESC}" 2696 ) 2697 elif self.flavor == 'oracle': 2698 base_query = ( 2699 "SELECT * FROM (\n" 2700 f" SELECT {dt_col_name}\n" 2701 f" FROM {src_name}{where}\n" 2702 f" ORDER BY {dt_col_name} {ASC_or_DESC}\n" 2703 ") WHERE ROWNUM = 1" 2704 ) 2705 2706 query = wrap_query_with_cte(src_query, base_query, flavor) 2707 2708 try: 2709 db_time = self.value(query, silent=True, debug=debug) 2710 2711 ### No datetime could be found. 2712 if db_time is None: 2713 return None 2714 ### sqlite returns str. 2715 if isinstance(db_time, str): 2716 dateutil_parser = mrsm.attempt_import('dateutil.parser') 2717 st = dateutil_parser.parse(db_time) 2718 ### Do nothing if a datetime object is returned. 2719 elif isinstance(db_time, datetime): 2720 if hasattr(db_time, 'to_pydatetime'): 2721 st = db_time.to_pydatetime() 2722 else: 2723 st = db_time 2724 ### Sometimes the datetime is actually a date. 2725 elif isinstance(db_time, date): 2726 st = datetime.combine(db_time, datetime.min.time()) 2727 ### Adding support for an integer datetime axis. 2728 elif 'int' in str(type(db_time)).lower(): 2729 st = int(db_time) 2730 ### Convert pandas timestamp to Python datetime. 2731 else: 2732 st = db_time.to_pydatetime() 2733 2734 sync_time = st 2735 2736 except Exception as e: 2737 sync_time = None 2738 warn(str(e)) 2739 2740 return sync_time
Get a Pipe's most recent datetime value.
Parameters
- pipe (mrsm.Pipe): The pipe to get the sync time for.
- params (Optional[Dict[str, Any]], default None):
Optional params dictionary to build the
WHEREclause. Seemeerschaum.utils.sql.build_where. - newest (bool, default True):
If
True, get the most recent datetime (honoringparams). IfFalse, get the oldest datetime (ASC instead of DESC). - remote (bool, default False):
If
True, return the sync time for the remote fetch definition.
Returns
- A
datetimeobject (orintif using an integer axis) if the pipe exists, otherwiseNone.
2743def pipe_exists( 2744 self, 2745 pipe: mrsm.Pipe, 2746 debug: bool = False 2747) -> bool: 2748 """ 2749 Check that a Pipe's table exists. 2750 2751 Parameters 2752 ---------- 2753 pipe: mrsm.Pipe: 2754 The pipe to check. 2755 2756 debug: bool, default False 2757 Verbosity toggle. 2758 2759 Returns 2760 ------- 2761 A `bool` corresponding to whether a pipe's table exists. 2762 2763 """ 2764 from meerschaum.utils.sql import table_exists 2765 exists = table_exists( 2766 pipe.target, 2767 self, 2768 schema=self.get_pipe_schema(pipe), 2769 debug=debug, 2770 ) 2771 if debug: 2772 dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.')) 2773 return exists
Check that a Pipe's table exists.
Parameters
- pipe (mrsm.Pipe:): The pipe to check.
- debug (bool, default False): Verbosity toggle.
Returns
- A
boolcorresponding to whether a pipe's table exists.
2776def get_pipe_rowcount( 2777 self, 2778 pipe: mrsm.Pipe, 2779 begin: Union[datetime, int, None] = None, 2780 end: Union[datetime, int, None] = None, 2781 params: Optional[Dict[str, Any]] = None, 2782 remote: bool = False, 2783 debug: bool = False 2784) -> Union[int, None]: 2785 """ 2786 Get the rowcount for a pipe in accordance with given parameters. 2787 2788 Parameters 2789 ---------- 2790 pipe: mrsm.Pipe 2791 The pipe to query with. 2792 2793 begin: Union[datetime, int, None], default None 2794 The begin datetime value. 2795 2796 end: Union[datetime, int, None], default None 2797 The end datetime value. 2798 2799 params: Optional[Dict[str, Any]], default None 2800 See `meerschaum.utils.sql.build_where`. 2801 2802 remote: bool, default False 2803 If `True`, get the rowcount for the remote table. 2804 2805 debug: bool, default False 2806 Verbosity toggle. 2807 2808 Returns 2809 ------- 2810 An `int` for the number of rows if the `pipe` exists, otherwise `None`. 2811 2812 """ 2813 from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte, build_where 2814 from meerschaum.connectors.sql._fetch import get_pipe_query 2815 from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type 2816 if remote: 2817 msg = f"'fetch:definition' must be an attribute of {pipe} to get a remote rowcount." 2818 if 'fetch' not in pipe.parameters: 2819 error(msg) 2820 return None 2821 if 'definition' not in pipe.parameters['fetch']: 2822 error(msg) 2823 return None 2824 2825 flavor = self.flavor if not remote else pipe.connector.flavor 2826 conn = self if not remote else pipe.connector 2827 _pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe)) 2828 dt_col = pipe.columns.get('datetime', None) 2829 dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None 2830 dt_db_type = get_db_type_from_pd_type(dt_typ, flavor) if dt_typ else None 2831 if not dt_col: 2832 dt_col = pipe.guess_datetime() 2833 dt_name = sql_item_name(dt_col, flavor, None) if dt_col else None 2834 is_guess = True 2835 else: 2836 dt_col = pipe.get_columns('datetime') 2837 dt_name = sql_item_name(dt_col, flavor, None) 2838 is_guess = False 2839 2840 if begin is not None or end is not None: 2841 if is_guess: 2842 if dt_col is None: 2843 warn( 2844 f"No datetime could be determined for {pipe}." 2845 + "\n Ignoring begin and end...", 2846 stack=False, 2847 ) 2848 begin, end = None, None 2849 else: 2850 warn( 2851 f"A datetime wasn't specified for {pipe}.\n" 2852 + f" Using column \"{dt_col}\" for datetime bounds...", 2853 stack=False, 2854 ) 2855 2856 2857 _datetime_name = sql_item_name(dt_col, flavor) 2858 _cols_names = [ 2859 sql_item_name(col, flavor) 2860 for col in set( 2861 ( 2862 [dt_col] 2863 if dt_col 2864 else [] 2865 ) + ( 2866 [] 2867 if params is None 2868 else list(params.keys()) 2869 ) 2870 ) 2871 ] 2872 if not _cols_names: 2873 _cols_names = ['*'] 2874 2875 src = ( 2876 f"SELECT {', '.join(_cols_names)}\nFROM {_pipe_name}" 2877 if not remote 2878 else get_pipe_query(pipe) 2879 ) 2880 parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', flavor)}" 2881 query = wrap_query_with_cte(src, parent_query, flavor) 2882 if begin is not None or end is not None: 2883 query += "\nWHERE" 2884 if begin is not None: 2885 query += ( 2886 f"\n {dt_name} >= " 2887 + dateadd_str(flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type) 2888 ) 2889 if end is not None and begin is not None: 2890 query += "\n AND" 2891 if end is not None: 2892 query += ( 2893 f"\n {dt_name} < " 2894 + dateadd_str(flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type) 2895 ) 2896 if params is not None: 2897 existing_cols = pipe.get_columns_types(debug=debug) 2898 valid_params = {k: v for k, v in params.items() if k in existing_cols} 2899 if valid_params: 2900 query += build_where(valid_params, conn).replace('WHERE', ( 2901 'AND' if (begin is not None or end is not None) 2902 else 'WHERE' 2903 ) 2904 ) 2905 2906 result = conn.value(query, debug=debug, silent=True) 2907 try: 2908 return int(result) 2909 except Exception: 2910 return None
Get the rowcount for a pipe in accordance with given parameters.
Parameters
- pipe (mrsm.Pipe): The pipe to query with.
- begin (Union[datetime, int, None], default None): The begin datetime value.
- end (Union[datetime, int, None], default None): The end datetime value.
- params (Optional[Dict[str, Any]], default None):
See
meerschaum.utils.sql.build_where. - remote (bool, default False):
If
True, get the rowcount for the remote table. - debug (bool, default False): Verbosity toggle.
Returns
- An
intfor the number of rows if thepipeexists, otherwiseNone.
2913def drop_pipe( 2914 self, 2915 pipe: mrsm.Pipe, 2916 debug: bool = False, 2917 **kw 2918) -> SuccessTuple: 2919 """ 2920 Drop a pipe's tables but maintain its registration. 2921 2922 Parameters 2923 ---------- 2924 pipe: mrsm.Pipe 2925 The pipe to drop. 2926 2927 Returns 2928 ------- 2929 A `SuccessTuple` indicated success. 2930 """ 2931 from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS 2932 success = True 2933 target = pipe.target 2934 schema = self.get_pipe_schema(pipe) 2935 target_name = ( 2936 sql_item_name(target, self.flavor, schema) 2937 ) 2938 if table_exists(target, self, schema=schema, debug=debug): 2939 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 2940 success = self.exec( 2941 f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug 2942 ) is not None 2943 2944 msg = "Success" if success else f"Failed to drop {pipe}." 2945 return success, msg
Drop a pipe's tables but maintain its registration.
Parameters
- pipe (mrsm.Pipe): The pipe to drop.
Returns
- A
SuccessTupleindicated success.
2948def clear_pipe( 2949 self, 2950 pipe: mrsm.Pipe, 2951 begin: Union[datetime, int, None] = None, 2952 end: Union[datetime, int, None] = None, 2953 params: Optional[Dict[str, Any]] = None, 2954 debug: bool = False, 2955 **kw 2956) -> SuccessTuple: 2957 """ 2958 Delete a pipe's data within a bounded or unbounded interval without dropping the table. 2959 2960 Parameters 2961 ---------- 2962 pipe: mrsm.Pipe 2963 The pipe to clear. 2964 2965 begin: Union[datetime, int, None], default None 2966 Beginning datetime. Inclusive. 2967 2968 end: Union[datetime, int, None], default None 2969 Ending datetime. Exclusive. 2970 2971 params: Optional[Dict[str, Any]], default None 2972 See `meerschaum.utils.sql.build_where`. 2973 2974 """ 2975 if not pipe.exists(debug=debug): 2976 return True, f"{pipe} does not exist, so nothing was cleared." 2977 2978 from meerschaum.utils.sql import sql_item_name, build_where, dateadd_str 2979 from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type 2980 pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 2981 2982 dt_col = pipe.columns.get('datetime', None) 2983 dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None 2984 dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None 2985 if not pipe.columns.get('datetime', None): 2986 dt_col = pipe.guess_datetime() 2987 dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None 2988 is_guess = True 2989 else: 2990 dt_col = pipe.get_columns('datetime') 2991 dt_name = sql_item_name(dt_col, self.flavor, None) 2992 is_guess = False 2993 2994 if begin is not None or end is not None: 2995 if is_guess: 2996 if dt_col is None: 2997 warn( 2998 f"No datetime could be determined for {pipe}." 2999 + "\n Ignoring datetime bounds...", 3000 stack=False, 3001 ) 3002 begin, end = None, None 3003 else: 3004 warn( 3005 f"A datetime wasn't specified for {pipe}.\n" 3006 + f" Using column \"{dt_col}\" for datetime bounds...", 3007 stack=False, 3008 ) 3009 3010 valid_params = {} 3011 if params is not None: 3012 existing_cols = pipe.get_columns_types(debug=debug) 3013 valid_params = {k: v for k, v in params.items() if k in existing_cols} 3014 clear_query = ( 3015 f"DELETE FROM {pipe_name}\nWHERE 1 = 1\n" 3016 + ('\n AND ' + build_where(valid_params, self, with_where=False) if valid_params else '') 3017 + ( 3018 ( 3019 f'\n AND {dt_name} >= ' 3020 + dateadd_str(self.flavor, 'day', 0, begin, db_type=dt_db_type) 3021 ) 3022 if begin is not None 3023 else '' 3024 ) + ( 3025 ( 3026 f'\n AND {dt_name} < ' 3027 + dateadd_str(self.flavor, 'day', 0, end, db_type=dt_db_type) 3028 ) 3029 if end is not None 3030 else '' 3031 ) 3032 ) 3033 success = self.exec(clear_query, silent=True, debug=debug) is not None 3034 msg = "Success" if success else f"Failed to clear {pipe}." 3035 return success, msg
Delete a pipe's data within a bounded or unbounded interval without dropping the table.
Parameters
- pipe (mrsm.Pipe): The pipe to clear.
- begin (Union[datetime, int, None], default None): Beginning datetime. Inclusive.
- end (Union[datetime, int, None], default None): Ending datetime. Exclusive.
- params (Optional[Dict[str, Any]], default None):
See
meerschaum.utils.sql.build_where.
3678def deduplicate_pipe( 3679 self, 3680 pipe: mrsm.Pipe, 3681 begin: Union[datetime, int, None] = None, 3682 end: Union[datetime, int, None] = None, 3683 params: Optional[Dict[str, Any]] = None, 3684 debug: bool = False, 3685 **kwargs: Any 3686) -> SuccessTuple: 3687 """ 3688 Delete duplicate values within a pipe's table. 3689 3690 Parameters 3691 ---------- 3692 pipe: mrsm.Pipe 3693 The pipe whose table to deduplicate. 3694 3695 begin: Union[datetime, int, None], default None 3696 If provided, only deduplicate values greater than or equal to this value. 3697 3698 end: Union[datetime, int, None], default None 3699 If provided, only deduplicate values less than this value. 3700 3701 params: Optional[Dict[str, Any]], default None 3702 If provided, further limit deduplication to values which match this query dictionary. 3703 3704 debug: bool, default False 3705 Verbosity toggle. 3706 3707 Returns 3708 ------- 3709 A `SuccessTuple` indicating success. 3710 """ 3711 from meerschaum.utils.sql import ( 3712 sql_item_name, 3713 get_rename_table_queries, 3714 DROP_IF_EXISTS_FLAVORS, 3715 get_create_table_query, 3716 format_cte_subquery, 3717 get_null_replacement, 3718 ) 3719 from meerschaum.utils.misc import generate_password, flatten_list 3720 3721 pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe)) 3722 3723 if not pipe.exists(debug=debug): 3724 return False, f"Table {pipe_table_name} does not exist." 3725 3726 dt_col = pipe.columns.get('datetime', None) 3727 cols_types = pipe.get_columns_types(debug=debug) 3728 existing_cols = pipe.get_columns_types(debug=debug) 3729 3730 get_rowcount_query = f"SELECT COUNT(*) FROM {pipe_table_name}" 3731 old_rowcount = self.value(get_rowcount_query, debug=debug) 3732 if old_rowcount is None: 3733 return False, f"Failed to get rowcount for table {pipe_table_name}." 3734 3735 ### Non-datetime indices that in fact exist. 3736 indices = [ 3737 col 3738 for key, col in pipe.columns.items() 3739 if col and col != dt_col and col in cols_types 3740 ] 3741 indices_names = [sql_item_name(index_col, self.flavor, None) for index_col in indices] 3742 existing_cols_names = [sql_item_name(col, self.flavor, None) for col in existing_cols] 3743 duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None) 3744 previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None) 3745 3746 index_list_str = ( 3747 sql_item_name(dt_col, self.flavor, None) 3748 if dt_col 3749 else '' 3750 ) 3751 index_list_str_ordered = ( 3752 ( 3753 sql_item_name(dt_col, self.flavor, None) + " DESC" 3754 ) 3755 if dt_col 3756 else '' 3757 ) 3758 if indices: 3759 index_list_str += ', ' + ', '.join(indices_names) 3760 index_list_str_ordered += ', ' + ', '.join(indices_names) 3761 if index_list_str.startswith(','): 3762 index_list_str = index_list_str.lstrip(',').lstrip() 3763 if index_list_str_ordered.startswith(','): 3764 index_list_str_ordered = index_list_str_ordered.lstrip(',').lstrip() 3765 3766 cols_list_str = ', '.join(existing_cols_names) 3767 3768 try: 3769 ### NOTE: MySQL 5 and below does not support window functions (ROW_NUMBER()). 3770 is_old_mysql = ( 3771 self.flavor in ('mysql', 'mariadb') 3772 and 3773 int(self.db_version.split('.')[0]) < 8 3774 ) 3775 except Exception: 3776 is_old_mysql = False 3777 3778 src_query = f""" 3779 SELECT 3780 {cols_list_str}, 3781 ROW_NUMBER() OVER ( 3782 PARTITION BY 3783 {index_list_str} 3784 ORDER BY {index_list_str_ordered} 3785 ) AS {duplicate_row_number_name} 3786 FROM {pipe_table_name} 3787 """ 3788 duplicates_cte_subquery = format_cte_subquery( 3789 src_query, 3790 self.flavor, 3791 sub_name = 'src', 3792 cols_to_select = cols_list_str, 3793 ) + f""" 3794 WHERE {duplicate_row_number_name} = 1 3795 """ 3796 old_mysql_query = ( 3797 f""" 3798 SELECT 3799 {index_list_str} 3800 FROM ( 3801 SELECT 3802 {index_list_str}, 3803 IF( 3804 @{previous_row_number_name} <> {index_list_str.replace(', ', ' + ')}, 3805 @{duplicate_row_number_name} := 0, 3806 @{duplicate_row_number_name} 3807 ), 3808 @{previous_row_number_name} := {index_list_str.replace(', ', ' + ')}, 3809 @{duplicate_row_number_name} := @{duplicate_row_number_name} + 1 AS """ 3810 + f"""{duplicate_row_number_name} 3811 FROM 3812 {pipe_table_name}, 3813 ( 3814 SELECT @{duplicate_row_number_name} := 0 3815 ) AS {duplicate_row_number_name}, 3816 ( 3817 SELECT @{previous_row_number_name} := '{get_null_replacement('str', 'mysql')}' 3818 ) AS {previous_row_number_name} 3819 ORDER BY {index_list_str_ordered} 3820 ) AS t 3821 WHERE {duplicate_row_number_name} = 1 3822 """ 3823 ) 3824 if is_old_mysql: 3825 duplicates_cte_subquery = old_mysql_query 3826 3827 session_id = generate_password(3) 3828 3829 dedup_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='dedup') 3830 temp_old_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='old') 3831 temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe)) 3832 3833 create_temporary_table_query = get_create_table_query( 3834 duplicates_cte_subquery, 3835 dedup_table, 3836 self.flavor, 3837 ) + f""" 3838 ORDER BY {index_list_str_ordered} 3839 """ 3840 if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else "" 3841 alter_queries = flatten_list([ 3842 get_rename_table_queries( 3843 pipe.target, 3844 temp_old_table, 3845 self.flavor, 3846 schema=self.get_pipe_schema(pipe), 3847 ), 3848 get_rename_table_queries( 3849 dedup_table, 3850 pipe.target, 3851 self.flavor, 3852 schema=self.get_pipe_schema(pipe), 3853 ), 3854 f"DROP TABLE {if_exists_str} {temp_old_table_name}", 3855 ]) 3856 3857 self._log_temporary_tables_creation(temp_old_table, create=(not pipe.temporary), debug=debug) 3858 create_temporary_result = self.execute(create_temporary_table_query, debug=debug) 3859 if create_temporary_result is None: 3860 return False, f"Failed to deduplicate table {pipe_table_name}." 3861 3862 results = self.exec_queries( 3863 alter_queries, 3864 break_on_error=True, 3865 rollback=True, 3866 debug=debug, 3867 ) 3868 3869 fail_query = None 3870 for result, query in zip(results, alter_queries): 3871 if result is None: 3872 fail_query = query 3873 break 3874 success = fail_query is None 3875 3876 new_rowcount = ( 3877 self.value(get_rowcount_query, debug=debug) 3878 if success 3879 else None 3880 ) 3881 3882 msg = ( 3883 ( 3884 f"Successfully deduplicated table {pipe_table_name}" 3885 + ( 3886 f"\nfrom {old_rowcount:,} to {new_rowcount:,} rows" 3887 if old_rowcount != new_rowcount 3888 else '' 3889 ) + '.' 3890 ) 3891 if success 3892 else f"Failed to execute query:\n{fail_query}" 3893 ) 3894 return success, msg
Delete duplicate values within a pipe's table.
Parameters
- pipe (mrsm.Pipe): The pipe whose table to deduplicate.
- begin (Union[datetime, int, None], default None): If provided, only deduplicate values greater than or equal to this value.
- end (Union[datetime, int, None], default None): If provided, only deduplicate values less than this value.
- params (Optional[Dict[str, Any]], default None): If provided, further limit deduplication to values which match this query dictionary.
- debug (bool, default False): Verbosity toggle.
Returns
- A
SuccessTupleindicating success.
3038def get_pipe_table( 3039 self, 3040 pipe: mrsm.Pipe, 3041 debug: bool = False, 3042) -> Union['sqlalchemy.Table', None]: 3043 """ 3044 Return the `sqlalchemy.Table` object for a `mrsm.Pipe`. 3045 3046 Parameters 3047 ---------- 3048 pipe: mrsm.Pipe: 3049 The pipe in question. 3050 3051 Returns 3052 ------- 3053 A `sqlalchemy.Table` object. 3054 3055 """ 3056 from meerschaum.utils.sql import get_sqlalchemy_table 3057 if not pipe.exists(debug=debug): 3058 return None 3059 3060 return get_sqlalchemy_table( 3061 pipe.target, 3062 connector=self, 3063 schema=self.get_pipe_schema(pipe), 3064 debug=debug, 3065 refresh=True, 3066 )
Return the sqlalchemy.Table object for a mrsm.Pipe.
Parameters
- pipe (mrsm.Pipe:): The pipe in question.
Returns
- A
sqlalchemy.Tableobject.
3069def get_pipe_columns_types( 3070 self, 3071 pipe: mrsm.Pipe, 3072 debug: bool = False, 3073) -> Dict[str, str]: 3074 """ 3075 Get the pipe's columns and types. 3076 3077 Parameters 3078 ---------- 3079 pipe: mrsm.Pipe: 3080 The pipe to get the columns for. 3081 3082 Returns 3083 ------- 3084 A dictionary of columns names (`str`) and types (`str`). 3085 3086 Examples 3087 -------- 3088 >>> conn.get_pipe_columns_types(pipe) 3089 { 3090 'dt': 'TIMESTAMP WITHOUT TIMEZONE', 3091 'id': 'BIGINT', 3092 'val': 'DOUBLE PRECISION', 3093 } 3094 >>> 3095 """ 3096 from meerschaum.utils.sql import get_table_cols_types 3097 if not pipe.exists(debug=debug): 3098 return {} 3099 3100 if self.flavor not in ('oracle', 'mysql', 'mariadb', 'sqlite', 'geopackage'): 3101 return get_table_cols_types( 3102 pipe.target, 3103 self, 3104 flavor=self.flavor, 3105 schema=self.get_pipe_schema(pipe), 3106 debug=debug, 3107 ) 3108 3109 if debug: 3110 dprint(f"Fetching columns_types for {pipe} with via SQLAlchemy table.") 3111 3112 table_columns = {} 3113 try: 3114 pipe_table = self.get_pipe_table(pipe, debug=debug) 3115 if pipe_table is None: 3116 return {} 3117 3118 if debug: 3119 dprint("Found columns:") 3120 mrsm.pprint(dict(pipe_table.columns)) 3121 3122 for col in pipe_table.columns: 3123 table_columns[str(col.name)] = str(col.type) 3124 except Exception as e: 3125 traceback.print_exc() 3126 warn(e) 3127 table_columns = {} 3128 3129 return table_columns
Get the pipe's columns and types.
Parameters
- pipe (mrsm.Pipe:): The pipe to get the columns for.
Returns
- A dictionary of columns names (
str) and types (str).
Examples
>>> conn.get_pipe_columns_types(pipe)
{
'dt': 'TIMESTAMP WITHOUT TIMEZONE',
'id': 'BIGINT',
'val': 'DOUBLE PRECISION',
}
>>>
3624def get_to_sql_dtype( 3625 self, 3626 pipe: 'mrsm.Pipe', 3627 df: 'pd.DataFrame', 3628 update_dtypes: bool = True, 3629) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']: 3630 """ 3631 Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`. 3632 3633 Parameters 3634 ---------- 3635 pipe: mrsm.Pipe 3636 The pipe which may contain a `dtypes` parameter. 3637 3638 df: pd.DataFrame 3639 The DataFrame to be pushed via `to_sql()`. 3640 3641 update_dtypes: bool, default True 3642 If `True`, patch the pipe's dtypes onto the DataFrame's dtypes. 3643 3644 Returns 3645 ------- 3646 A dictionary with `sqlalchemy` datatypes. 3647 3648 Examples 3649 -------- 3650 >>> import pandas as pd 3651 >>> import meerschaum as mrsm 3652 >>> 3653 >>> conn = mrsm.get_connector('sql:memory') 3654 >>> df = pd.DataFrame([{'a': {'b': 1}}]) 3655 >>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'}) 3656 >>> get_to_sql_dtype(pipe, df) 3657 {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>} 3658 """ 3659 from meerschaum.utils.dataframe import get_special_cols 3660 from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type 3661 df_dtypes = { 3662 col: str(typ) 3663 for col, typ in df.dtypes.items() 3664 } 3665 special_cols = get_special_cols(df) 3666 df_dtypes.update(special_cols) 3667 3668 if update_dtypes: 3669 df_dtypes.update(pipe.dtypes) 3670 3671 return { 3672 col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True) 3673 for col, typ in df_dtypes.items() 3674 if col and typ 3675 }
Given a pipe and DataFrame, return the dtype dictionary for to_sql().
Parameters
- pipe (mrsm.Pipe):
The pipe which may contain a
dtypesparameter. - df (pd.DataFrame):
The DataFrame to be pushed via
to_sql(). - update_dtypes (bool, default True):
If
True, patch the pipe's dtypes onto the DataFrame's dtypes.
Returns
- A dictionary with
sqlalchemydatatypes.
Examples
>>> import pandas as pd
>>> import meerschaum as mrsm
>>>
>>> conn = mrsm.get_connector('sql:memory')
>>> df = pd.DataFrame([{'a': {'b': 1}}])
>>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'})
>>> get_to_sql_dtype(pipe, df)
{'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
3897def get_pipe_schema(self, pipe: mrsm.Pipe) -> Union[str, None]: 3898 """ 3899 Return the schema to use for this pipe. 3900 First check `pipe.parameters['schema']`, then check `self.schema`. 3901 3902 Parameters 3903 ---------- 3904 pipe: mrsm.Pipe 3905 The pipe which may contain a configured schema. 3906 3907 Returns 3908 ------- 3909 A schema string or `None` if nothing is configured. 3910 """ 3911 if self.flavor in ('sqlite', 'geopackage'): 3912 return self.schema 3913 return pipe.parameters.get('schema', self.schema)
Return the schema to use for this pipe.
First check pipe.parameters['schema'], then check self.schema.
Parameters
- pipe (mrsm.Pipe): The pipe which may contain a configured schema.
Returns
- A schema string or
Noneif nothing is configured.
1551def create_pipe_table_from_df( 1552 self, 1553 pipe: mrsm.Pipe, 1554 df: 'pd.DataFrame', 1555 debug: bool = False, 1556) -> mrsm.SuccessTuple: 1557 """ 1558 Create a pipe's table from its configured dtypes and an incoming dataframe. 1559 """ 1560 from meerschaum.utils.dataframe import get_special_cols 1561 from meerschaum.utils.sql import ( 1562 get_create_table_queries, 1563 sql_item_name, 1564 get_create_schema_if_not_exists_queries, 1565 ) 1566 from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type 1567 if self.flavor == 'geopackage': 1568 init_success, init_msg = self._init_geopackage_pipe(df, pipe, debug=debug) 1569 if not init_success: 1570 return init_success, init_msg 1571 1572 primary_key = pipe.columns.get('primary', None) 1573 primary_key_typ = ( 1574 pipe.dtypes.get(primary_key, str(df.dtypes.get(primary_key, 'int'))) 1575 if primary_key 1576 else None 1577 ) 1578 primary_key_db_type = ( 1579 get_db_type_from_pd_type(primary_key_typ, self.flavor) 1580 if primary_key 1581 else None 1582 ) 1583 dt_col = pipe.columns.get('datetime', None) 1584 new_dtypes = { 1585 **{ 1586 col: str(typ) 1587 for col, typ in df.dtypes.items() 1588 }, 1589 **{ 1590 col: str(df.dtypes.get(col, 'int')) 1591 for col_ix, col in pipe.columns.items() 1592 if col and col_ix != 'primary' 1593 }, 1594 **get_special_cols(df), 1595 **pipe.dtypes 1596 } 1597 autoincrement = ( 1598 pipe.parameters.get('autoincrement', False) 1599 or (primary_key and primary_key not in new_dtypes) 1600 ) 1601 if autoincrement: 1602 _ = new_dtypes.pop(primary_key, None) 1603 1604 schema = self.get_pipe_schema(pipe) 1605 create_table_queries = get_create_table_queries( 1606 new_dtypes, 1607 pipe.target, 1608 self.flavor, 1609 schema=schema, 1610 primary_key=primary_key, 1611 primary_key_db_type=primary_key_db_type, 1612 datetime_column=dt_col, 1613 ) 1614 if schema: 1615 create_table_queries = ( 1616 get_create_schema_if_not_exists_queries(schema, self.flavor) 1617 + create_table_queries 1618 ) 1619 success = all( 1620 self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug) 1621 ) 1622 target_name = sql_item_name(pipe.target, schema=self.get_pipe_schema(pipe), flavor=self.flavor) 1623 msg = ( 1624 "Success" 1625 if success 1626 else f"Failed to create {target_name}." 1627 ) 1628 if success and self.flavor == 'geopackage': 1629 return self._init_geopackage_pipe(df, pipe, debug=debug) 1630 1631 return success, msg
Create a pipe's table from its configured dtypes and an incoming dataframe.
3132def get_pipe_columns_indices( 3133 self, 3134 pipe: mrsm.Pipe, 3135 debug: bool = False, 3136) -> Dict[str, List[Dict[str, str]]]: 3137 """ 3138 Return a dictionary mapping columns to the indices created on those columns. 3139 3140 Parameters 3141 ---------- 3142 pipe: mrsm.Pipe 3143 The pipe to be queried against. 3144 3145 Returns 3146 ------- 3147 A dictionary mapping columns names to lists of dictionaries. 3148 The dictionaries in the lists contain the name and type of the indices. 3149 """ 3150 if pipe.__dict__.get('_skip_check_indices', False): 3151 return {} 3152 3153 from meerschaum.utils.sql import get_table_cols_indices 3154 return get_table_cols_indices( 3155 pipe.target, 3156 self, 3157 flavor=self.flavor, 3158 schema=self.get_pipe_schema(pipe), 3159 debug=debug, 3160 )
Return a dictionary mapping columns to the indices created on those columns.
Parameters
- pipe (mrsm.Pipe): The pipe to be queried against.
Returns
- A dictionary mapping columns names to lists of dictionaries.
- The dictionaries in the lists contain the name and type of the indices.
3916@staticmethod 3917def get_temporary_target( 3918 target: str, 3919 transact_id: Optional[str] = None, 3920 label: Optional[str] = None, 3921 separator: Optional[str] = None, 3922) -> str: 3923 """ 3924 Return a unique(ish) temporary target for a pipe. 3925 """ 3926 from meerschaum.utils.misc import generate_password 3927 temp_target_cf = ( 3928 mrsm.get_config('system', 'connectors', 'sql', 'instance', 'temporary_target') or {} 3929 ) 3930 transaction_id_len = temp_target_cf.get('transaction_id_length', 3) 3931 transact_id = transact_id or generate_password(transaction_id_len) 3932 temp_prefix = temp_target_cf.get('prefix', '_') 3933 separator = separator or temp_target_cf.get('separator', '_') 3934 return ( 3935 temp_prefix 3936 + target 3937 + separator 3938 + transact_id 3939 + ((separator + label) if label else '') 3940 )
Return a unique(ish) temporary target for a pipe.
351def create_pipe_indices( 352 self, 353 pipe: mrsm.Pipe, 354 columns: Optional[List[str]] = None, 355 debug: bool = False, 356) -> SuccessTuple: 357 """ 358 Create a pipe's indices. 359 """ 360 success = self.create_indices(pipe, columns=columns, debug=debug) 361 msg = ( 362 "Success" 363 if success 364 else f"Failed to create indices for {pipe}." 365 ) 366 return success, msg
Create a pipe's indices.
407def drop_pipe_indices( 408 self, 409 pipe: mrsm.Pipe, 410 columns: Optional[List[str]] = None, 411 debug: bool = False, 412) -> SuccessTuple: 413 """ 414 Drop a pipe's indices. 415 """ 416 success = self.drop_indices(pipe, columns=columns, debug=debug) 417 msg = ( 418 "Success" 419 if success 420 else f"Failed to drop indices for {pipe}." 421 ) 422 return success, msg
Drop a pipe's indices.
459def get_pipe_index_names(self, pipe: mrsm.Pipe) -> Dict[str, str]: 460 """ 461 Return a dictionary mapping index keys to their names on the database. 462 463 Returns 464 ------- 465 A dictionary of index keys to column names. 466 """ 467 from meerschaum.utils.sql import DEFAULT_SCHEMA_FLAVORS, truncate_item_name 468 _parameters = pipe.parameters 469 _index_template = _parameters.get('index_template', "IX_{schema_str}{target}_{column_names}") 470 _schema = self.get_pipe_schema(pipe) 471 if _schema is None: 472 _schema = ( 473 DEFAULT_SCHEMA_FLAVORS.get(self.flavor, None) 474 if self.flavor != 'mssql' 475 else None 476 ) 477 schema_str = '' if _schema is None else f'{_schema}_' 478 schema_str = '' 479 _indices = pipe.indices 480 _target = pipe.target 481 _column_names = { 482 ix: ( 483 '_'.join(cols) 484 if isinstance(cols, (list, tuple)) 485 else str(cols) 486 ) 487 for ix, cols in _indices.items() 488 if cols 489 } 490 _index_names = { 491 ix: _index_template.format( 492 target=_target, 493 column_names=column_names, 494 connector_keys=pipe.connector_keys, 495 metric_key=pipe.metric_key, 496 location_key=pipe.location_key, 497 schema_str=schema_str, 498 ) 499 for ix, column_names in _column_names.items() 500 } 501 ### NOTE: Skip any duplicate indices. 502 seen_index_names = {} 503 for ix, index_name in _index_names.items(): 504 if index_name in seen_index_names: 505 continue 506 seen_index_names[index_name] = ix 507 return { 508 ix: truncate_item_name(index_name, flavor=self.flavor) 509 for index_name, ix in seen_index_names.items() 510 }
Return a dictionary mapping index keys to their names on the database.
Returns
- A dictionary of index keys to column names.
18def get_plugins_pipe(self) -> mrsm.Pipe: 19 """ 20 Return the internal metadata plugins pipe. 21 """ 22 users_pipe = self.get_users_pipe() 23 user_id_dtype = users_pipe.dtypes.get('user_id', 'int') 24 return mrsm.Pipe( 25 'mrsm', 'plugins', 26 instance=self, 27 temporary=True, 28 static=True, 29 null_indices=False, 30 columns={ 31 'primary': 'plugin_id', 32 'user_id': 'user_id', 33 }, 34 dtypes={ 35 'plugin_name': 'string', 36 'user_id': user_id_dtype, 37 'attributes': 'json', 38 'version': 'string', 39 }, 40 indices={ 41 'unique': 'plugin_name', 42 }, 43 )
Return the internal metadata plugins pipe.
46def register_plugin( 47 self, 48 plugin: 'mrsm.core.Plugin', 49 force: bool = False, 50 debug: bool = False, 51 **kw: Any 52) -> SuccessTuple: 53 """Register a new plugin to the plugins table.""" 54 from meerschaum.utils.packages import attempt_import 55 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 56 from meerschaum.utils.sql import json_flavors 57 from meerschaum.connectors.sql.tables import get_tables 58 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 59 60 old_id = self.get_plugin_id(plugin, debug=debug) 61 62 ### Check for version conflict. May be overridden with `--force`. 63 if old_id is not None and not force: 64 old_version = self.get_plugin_version(plugin, debug=debug) 65 new_version = plugin.version 66 if old_version is None: 67 old_version = '' 68 if new_version is None: 69 new_version = '' 70 71 ### verify that the new version is greater than the old 72 packaging_version = attempt_import('packaging.version') 73 if ( 74 old_version and new_version 75 and packaging_version.parse(old_version) >= packaging_version.parse(new_version) 76 ): 77 return False, ( 78 f"Version '{new_version}' of plugin '{plugin}' " + 79 f"must be greater than existing version '{old_version}'." 80 ) 81 82 bind_variables = { 83 'plugin_name': plugin.name, 84 'version': plugin.version, 85 'attributes': ( 86 json.dumps(plugin.attributes) if self.flavor not in json_flavors else plugin.attributes 87 ), 88 'user_id': plugin.user_id, 89 } 90 91 if old_id is None: 92 query = sqlalchemy.insert(plugins_tbl).values(**bind_variables) 93 else: 94 query = ( 95 sqlalchemy.update(plugins_tbl) 96 .values(**bind_variables) 97 .where(plugins_tbl.c.plugin_id == old_id) 98 ) 99 100 result = self.exec(query, debug=debug) 101 if result is None: 102 return False, f"Failed to register plugin '{plugin}'." 103 return True, f"Successfully registered plugin '{plugin}'."
Register a new plugin to the plugins table.
272def delete_plugin( 273 self, 274 plugin: 'mrsm.core.Plugin', 275 debug: bool = False, 276 **kw: Any 277) -> SuccessTuple: 278 """Delete a plugin from the plugins table.""" 279 from meerschaum.utils.packages import attempt_import 280 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 281 from meerschaum.connectors.sql.tables import get_tables 282 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 283 284 plugin_id = self.get_plugin_id(plugin, debug=debug) 285 if plugin_id is None: 286 return True, f"Plugin '{plugin}' was not registered." 287 288 query = sqlalchemy.delete(plugins_tbl).where(plugins_tbl.c.plugin_id == plugin_id) 289 result = self.exec(query, debug=debug) 290 if result is None: 291 return False, f"Failed to delete plugin '{plugin}'." 292 return True, f"Successfully deleted plugin '{plugin}'."
Delete a plugin from the plugins table.
105def get_plugin_id( 106 self, 107 plugin: 'mrsm.core.Plugin', 108 debug: bool = False 109) -> Optional[int]: 110 """ 111 Return a plugin's ID. 112 """ 113 ### ensure plugins table exists 114 from meerschaum.connectors.sql.tables import get_tables 115 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 116 from meerschaum.utils.packages import attempt_import 117 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 118 119 query = ( 120 sqlalchemy 121 .select(plugins_tbl.c.plugin_id) 122 .where(plugins_tbl.c.plugin_name == plugin.name) 123 ) 124 125 try: 126 return int(self.value(query, debug=debug)) 127 except Exception: 128 return None
Return a plugin's ID.
131def get_plugin_version( 132 self, 133 plugin: 'mrsm.core.Plugin', 134 debug: bool = False 135) -> Optional[str]: 136 """ 137 Return a plugin's version. 138 """ 139 ### ensure plugins table exists 140 from meerschaum.connectors.sql.tables import get_tables 141 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 142 from meerschaum.utils.packages import attempt_import 143 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 144 query = sqlalchemy.select(plugins_tbl.c.version).where(plugins_tbl.c.plugin_name == plugin.name) 145 return self.value(query, debug=debug)
Return a plugin's version.
225def get_plugins( 226 self, 227 user_id: Optional[int] = None, 228 search_term: Optional[str] = None, 229 debug: bool = False, 230 **kw: Any 231) -> List[str]: 232 """ 233 Return a list of all registered plugins. 234 235 Parameters 236 ---------- 237 user_id: Optional[int], default None 238 If specified, filter plugins by a specific `user_id`. 239 240 search_term: Optional[str], default None 241 If specified, add a `WHERE plugin_name LIKE '{search_term}%'` clause to filter the plugins. 242 243 244 Returns 245 ------- 246 A list of plugin names. 247 """ 248 ### ensure plugins table exists 249 from meerschaum.connectors.sql.tables import get_tables 250 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 251 from meerschaum.utils.packages import attempt_import 252 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 253 254 query = sqlalchemy.select(plugins_tbl.c.plugin_name) 255 if user_id is not None: 256 query = query.where(plugins_tbl.c.user_id == user_id) 257 if search_term is not None: 258 query = query.where(plugins_tbl.c.plugin_name.like(search_term + '%')) 259 260 rows = ( 261 self.execute(query).fetchall() 262 if self.flavor != 'duckdb' 263 else [ 264 (row['plugin_name'],) 265 for row in self.read(query).to_dict(orient='records') 266 ] 267 ) 268 269 return [row[0] for row in rows]
Return a list of all registered plugins.
Parameters
- user_id (Optional[int], default None):
If specified, filter plugins by a specific
user_id. - search_term (Optional[str], default None):
If specified, add a
WHERE plugin_name LIKE '{search_term}%'clause to filter the plugins.
Returns
- A list of plugin names.
147def get_plugin_user_id( 148 self, 149 plugin: 'mrsm.core.Plugin', 150 debug: bool = False 151) -> Optional[int]: 152 """ 153 Return a plugin's user ID. 154 """ 155 ### ensure plugins table exists 156 from meerschaum.connectors.sql.tables import get_tables 157 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 158 from meerschaum.utils.packages import attempt_import 159 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 160 161 query = ( 162 sqlalchemy 163 .select(plugins_tbl.c.user_id) 164 .where(plugins_tbl.c.plugin_name == plugin.name) 165 ) 166 167 try: 168 return int(self.value(query, debug=debug)) 169 except Exception: 170 return None
Return a plugin's user ID.
172def get_plugin_username( 173 self, 174 plugin: 'mrsm.core.Plugin', 175 debug: bool = False 176) -> Optional[str]: 177 """ 178 Return the username of a plugin's owner. 179 """ 180 ### ensure plugins table exists 181 from meerschaum.connectors.sql.tables import get_tables 182 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 183 users = get_tables(mrsm_instance=self, debug=debug)['users'] 184 from meerschaum.utils.packages import attempt_import 185 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 186 187 query = ( 188 sqlalchemy.select(users.c.username) 189 .where( 190 users.c.user_id == plugins_tbl.c.user_id 191 and plugins_tbl.c.plugin_name == plugin.name 192 ) 193 ) 194 195 return self.value(query, debug=debug)
Return the username of a plugin's owner.
198def get_plugin_attributes( 199 self, 200 plugin: 'mrsm.core.Plugin', 201 debug: bool = False 202) -> Dict[str, Any]: 203 """ 204 Return the attributes of a plugin. 205 """ 206 ### ensure plugins table exists 207 from meerschaum.connectors.sql.tables import get_tables 208 plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins'] 209 from meerschaum.utils.packages import attempt_import 210 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 211 212 query = ( 213 sqlalchemy 214 .select(plugins_tbl.c.attributes) 215 .where(plugins_tbl.c.plugin_name == plugin.name) 216 ) 217 218 _attr = self.value(query, debug=debug) 219 if isinstance(_attr, str): 220 _attr = json.loads(_attr) 221 elif _attr is None: 222 _attr = {} 223 return _attr
Return the attributes of a plugin.
16def get_users_pipe(self) -> mrsm.Pipe: 17 """ 18 Return the internal metadata pipe for users management. 19 """ 20 if '_users_pipe' in self.__dict__: 21 return self._users_pipe 22 23 cache_connector = self.__dict__.get('_cache_connector', None) 24 self._users_pipe = mrsm.Pipe( 25 'mrsm', 'users', 26 temporary=True, 27 cache=True, 28 cache_connector_keys=cache_connector, 29 static=True, 30 null_indices=False, 31 enforce=False, 32 autoincrement=True, 33 columns={ 34 'primary': 'user_id', 35 }, 36 dtypes={ 37 'user_id': 'int', 38 'username': 'string', 39 'attributes': 'json', 40 'user_type': 'string', 41 }, 42 indices={ 43 'unique': 'username', 44 }, 45 ) 46 return self._users_pipe
Return the internal metadata pipe for users management.
49def register_user( 50 self, 51 user: mrsm.core.User, 52 debug: bool = False, 53 **kw: Any 54) -> SuccessTuple: 55 """Register a new user.""" 56 from meerschaum.utils.packages import attempt_import 57 from meerschaum.utils.sql import json_flavors 58 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 59 60 valid_tuple = valid_username(user.username) 61 if not valid_tuple[0]: 62 return valid_tuple 63 64 old_id = self.get_user_id(user, debug=debug) 65 66 if old_id is not None: 67 return False, f"User '{user}' already exists." 68 69 ### ensure users table exists 70 from meerschaum.connectors.sql.tables import get_tables 71 tables = get_tables(mrsm_instance=self, debug=debug) 72 73 import json 74 bind_variables = { 75 'username': user.username, 76 'email': user.email, 77 'password_hash': user.password_hash, 78 'user_type': user.type, 79 'attributes': ( 80 json.dumps(user.attributes) 81 if self.flavor not in json_flavors 82 else user.attributes 83 ), 84 } 85 if old_id is not None: 86 return False, f"User '{user.username}' already exists." 87 if old_id is None: 88 query = ( 89 sqlalchemy.insert(tables['users']). 90 values(**bind_variables) 91 ) 92 93 result = self.exec(query, debug=debug) 94 if result is None: 95 return False, f"Failed to register user '{user}'." 96 return True, f"Successfully registered user '{user}'."
Register a new user.
188def get_user_id( 189 self, 190 user: 'mrsm.core.User', 191 debug: bool = False 192) -> Optional[int]: 193 """If a user is registered, return the `user_id`.""" 194 ### ensure users table exists 195 from meerschaum.utils.packages import attempt_import 196 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 197 from meerschaum.connectors.sql.tables import get_tables 198 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 199 200 query = ( 201 sqlalchemy.select(users_tbl.c.user_id) 202 .where(users_tbl.c.username == user.username) 203 ) 204 205 result = self.value(query, debug=debug) 206 if result is not None: 207 return int(result) 208 return None
If a user is registered, return the user_id.
282def get_users( 283 self, 284 debug: bool = False, 285 **kw: Any 286) -> List[str]: 287 """ 288 Get the registered usernames. 289 """ 290 ### ensure users table exists 291 from meerschaum.connectors.sql.tables import get_tables 292 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 293 from meerschaum.utils.packages import attempt_import 294 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 295 296 query = sqlalchemy.select(users_tbl.c.username) 297 298 return list(self.read(query, debug=debug)['username'])
Get the registered usernames.
133def edit_user( 134 self, 135 user: 'mrsm.core.User', 136 debug: bool = False, 137 **kw: Any 138) -> SuccessTuple: 139 """Update an existing user's metadata.""" 140 from meerschaum.utils.packages import attempt_import 141 from meerschaum.utils.sql import json_flavors 142 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 143 from meerschaum.connectors.sql.tables import get_tables 144 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 145 146 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 147 if user_id is None: 148 return False, ( 149 f"User '{user.username}' does not exist. " 150 f"Register user '{user.username}' before editing." 151 ) 152 user.user_id = user_id 153 154 import json 155 valid_tuple = valid_username(user.username) 156 if not valid_tuple[0]: 157 return valid_tuple 158 159 bind_variables = { 160 'user_id' : user_id, 161 'username' : user.username, 162 } 163 if user.password != '': 164 bind_variables['password_hash'] = user.password_hash 165 if user.email != '': 166 bind_variables['email'] = user.email 167 if user.attributes is not None and user.attributes != {}: 168 bind_variables['attributes'] = ( 169 json.dumps(user.attributes) if self.flavor not in json_flavors 170 else user.attributes 171 ) 172 if user.type != '': 173 bind_variables['user_type'] = user.type 174 175 query = ( 176 sqlalchemy 177 .update(users_tbl) 178 .values(**bind_variables) 179 .where(users_tbl.c.user_id == user_id) 180 ) 181 182 result = self.exec(query, debug=debug) 183 if result is None: 184 return False, f"Failed to edit user '{user}'." 185 return True, f"Successfully edited user '{user}'."
Update an existing user's metadata.
250def delete_user( 251 self, 252 user: 'mrsm.core.User', 253 debug: bool = False 254) -> SuccessTuple: 255 """Delete a user's record from the users table.""" 256 ### ensure users table exists 257 from meerschaum.connectors.sql.tables import get_tables 258 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 259 plugins = get_tables(mrsm_instance=self, debug=debug)['plugins'] 260 from meerschaum.utils.packages import attempt_import 261 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 262 263 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 264 265 if user_id is None: 266 return False, f"User '{user.username}' is not registered and cannot be deleted." 267 268 query = sqlalchemy.delete(users_tbl).where(users_tbl.c.user_id == user_id) 269 270 result = self.exec(query, debug=debug) 271 if result is None: 272 return False, f"Failed to delete user '{user}'." 273 274 query = sqlalchemy.delete(plugins).where(plugins.c.user_id == user_id) 275 result = self.exec(query, debug=debug) 276 if result is None: 277 return False, f"Failed to delete plugins of user '{user}'." 278 279 return True, f"Successfully deleted user '{user}'"
Delete a user's record from the users table.
301def get_user_password_hash( 302 self, 303 user: 'mrsm.core.User', 304 debug: bool = False, 305 **kw: Any 306) -> Optional[str]: 307 """ 308 Return the password has for a user. 309 **NOTE**: This may be dangerous and is only allowed if the security settings explicity allow it. 310 """ 311 from meerschaum.utils.debug import dprint 312 from meerschaum.connectors.sql.tables import get_tables 313 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 314 from meerschaum.utils.packages import attempt_import 315 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 316 317 if user.user_id is not None: 318 user_id = user.user_id 319 if debug: 320 dprint(f"Already given user_id: {user_id}") 321 else: 322 if debug: 323 dprint("Fetching user_id...") 324 user_id = self.get_user_id(user, debug=debug) 325 326 if user_id is None: 327 return None 328 329 query = sqlalchemy.select(users_tbl.c.password_hash).where(users_tbl.c.user_id == user_id) 330 331 return self.value(query, debug=debug)
Return the password has for a user. NOTE: This may be dangerous and is only allowed if the security settings explicity allow it.
334def get_user_type( 335 self, 336 user: 'mrsm.core.User', 337 debug: bool = False, 338 **kw: Any 339) -> Optional[str]: 340 """ 341 Return the user's type. 342 """ 343 from meerschaum.connectors.sql.tables import get_tables 344 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 345 from meerschaum.utils.packages import attempt_import 346 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 347 348 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 349 350 if user_id is None: 351 return None 352 353 query = sqlalchemy.select(users_tbl.c.user_type).where(users_tbl.c.user_id == user_id) 354 355 return self.value(query, debug=debug)
Return the user's type.
210def get_user_attributes( 211 self, 212 user: 'mrsm.core.User', 213 debug: bool = False 214) -> Union[Dict[str, Any], None]: 215 """ 216 Return the user's attributes. 217 """ 218 ### ensure users table exists 219 from meerschaum.utils.warnings import warn 220 from meerschaum.utils.packages import attempt_import 221 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 222 from meerschaum.connectors.sql.tables import get_tables 223 users_tbl = get_tables(mrsm_instance=self, debug=debug)['users'] 224 225 user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug) 226 227 query = ( 228 sqlalchemy.select(users_tbl.c.attributes) 229 .where(users_tbl.c.user_id == user_id) 230 ) 231 232 result = self.value(query, debug=debug) 233 if result is not None and not isinstance(result, dict): 234 try: 235 result = dict(result) 236 _parsed = True 237 except Exception: 238 _parsed = False 239 if not _parsed: 240 try: 241 import json 242 result = json.loads(result) 243 _parsed = True 244 except Exception: 245 _parsed = False 246 if not _parsed: 247 warn(f"Received unexpected type for attributes: {result}") 248 return result
Return the user's attributes.
15@classmethod 16def from_uri( 17 cls, 18 uri: str, 19 label: Optional[str] = None, 20 as_dict: bool = False, 21) -> Union[ 22 'meerschaum.connectors.SQLConnector', 23 Dict[str, Union[str, int]], 24]: 25 """ 26 Create a new SQLConnector from a URI string. 27 28 Parameters 29 ---------- 30 uri: str 31 The URI connection string. 32 33 label: Optional[str], default None 34 If provided, use this as the connector label. 35 Otherwise use the determined database name. 36 37 as_dict: bool, default False 38 If `True`, return a dictionary of the keyword arguments 39 necessary to create a new `SQLConnector`, otherwise create a new object. 40 41 Returns 42 ------- 43 A new SQLConnector object or a dictionary of attributes (if `as_dict` is `True`). 44 """ 45 46 params = cls.parse_uri(uri) 47 params['uri'] = uri 48 flavor = params.get('flavor', None) 49 if not flavor or flavor not in cls.flavor_configs: 50 error(f"Invalid flavor '{flavor}' detected from the provided URI.") 51 52 if 'database' not in params: 53 error("Unable to determine the database from the provided URI.") 54 55 if flavor in ('sqlite', 'duckdb', 'geopackage'): 56 if params['database'] == ':memory:': 57 params['label'] = label or f'memory_{flavor}' 58 else: 59 params['label'] = label or params['database'].split(os.path.sep)[-1].lower() 60 else: 61 params['label'] = label or ( 62 ( 63 (params['username'] + '@' if 'username' in params else '') 64 + params.get('host', '') 65 + ('/' if 'host' in params else '') 66 + params.get('database', '') 67 ).lower() 68 ) 69 70 return cls(**params) if not as_dict else params
Create a new SQLConnector from a URI string.
Parameters
- uri (str): The URI connection string.
- label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
- as_dict (bool, default False):
If
True, return a dictionary of the keyword arguments necessary to create a newSQLConnector, otherwise create a new object.
Returns
- A new SQLConnector object or a dictionary of attributes (if
as_dictisTrue).
73@staticmethod 74def parse_uri(uri: str) -> Dict[str, Any]: 75 """ 76 Parse a URI string into a dictionary of parameters. 77 78 Parameters 79 ---------- 80 uri: str 81 The database connection URI. 82 83 Returns 84 ------- 85 A dictionary of attributes. 86 87 Examples 88 -------- 89 >>> parse_uri('sqlite:////home/foo/bar.db') 90 {'database': '/home/foo/bar.db', 'flavor': 'sqlite'} 91 >>> parse_uri( 92 ... 'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439' 93 ... + '/master?driver=ODBC+Driver+17+for+SQL+Server' 94 ... ) 95 {'host': 'localhost', 'database': 'master', 'username': 'sa', 96 'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql', 97 'driver': 'ODBC Driver 17 for SQL Server'} 98 >>> 99 """ 100 from urllib.parse import parse_qs, urlparse 101 sqlalchemy = attempt_import('sqlalchemy', lazy=False) 102 parser = sqlalchemy.engine.url.make_url 103 params = parser(uri).translate_connect_args() 104 params['flavor'] = uri.split(':')[0].split('+')[0] 105 if params['flavor'] == 'postgres': 106 params['flavor'] = 'postgresql' 107 if '?' in uri: 108 parsed_uri = urlparse(uri) 109 for key, value in parse_qs(parsed_uri.query).items(): 110 params.update({key: value[0]}) 111 112 if '--search_path' in params.get('options', ''): 113 params.update({'schema': params['options'].replace('--search_path=', '', 1)}) 114 return params
Parse a URI string into a dictionary of parameters.
Parameters
- uri (str): The database connection URI.
Returns
- A dictionary of attributes.
Examples
>>> parse_uri('sqlite:////home/foo/bar.db')
{'database': '/home/foo/bar.db', 'flavor': 'sqlite'}
>>> parse_uri(
... 'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439'
... + '/master?driver=ODBC+Driver+17+for+SQL+Server'
... )
{'host': 'localhost', 'database': 'master', 'username': 'sa',
'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql',
'driver': 'ODBC Driver 17 for SQL Server'}
>>>
22class APIConnector(InstanceConnector): 23 """ 24 Connect to a Meerschaum API instance. 25 """ 26 27 IS_THREAD_SAFE: bool = False 28 OPTIONAL_ATTRIBUTES: List[str] = ['port', 'client_secret', 'client_id', 'api_key'] 29 30 from ._request import ( 31 make_request, 32 get, 33 post, 34 put, 35 patch, 36 delete, 37 wget, 38 ) 39 from ._actions import ( 40 get_actions, 41 do_action, 42 do_action_async, 43 do_action_legacy, 44 ) 45 from ._misc import get_mrsm_version, get_chaining_status 46 from ._pipes import ( 47 get_pipe_instance_keys, 48 register_pipe, 49 fetch_pipes_keys, 50 edit_pipe, 51 sync_pipe, 52 delete_pipe, 53 get_pipe_data, 54 get_pipe_id, 55 get_pipe_attributes, 56 get_sync_time, 57 pipe_exists, 58 create_metadata, 59 get_pipe_rowcount, 60 drop_pipe, 61 clear_pipe, 62 get_pipe_columns_types, 63 get_pipe_columns_indices, 64 ) 65 from ._fetch import fetch 66 from ._plugins import ( 67 register_plugin, 68 install_plugin, 69 delete_plugin, 70 get_plugins, 71 get_plugin_attributes, 72 ) 73 from ._login import login, test_connection 74 from ._users import ( 75 register_user, 76 get_user_id, 77 get_users, 78 edit_user, 79 delete_user, 80 get_user_password_hash, 81 get_user_type, 82 get_user_attributes, 83 ) 84 from ._tokens import ( 85 register_token, 86 get_token_model, 87 get_tokens, 88 edit_token, 89 invalidate_token, 90 get_token_scopes, 91 token_exists, 92 delete_token, 93 ) 94 from ._uri import from_uri 95 from ._jobs import ( 96 get_jobs, 97 get_job, 98 get_job_metadata, 99 get_job_properties, 100 get_job_exists, 101 delete_job, 102 start_job, 103 create_job, 104 stop_job, 105 pause_job, 106 get_logs, 107 get_job_stop_time, 108 monitor_logs, 109 monitor_logs_async, 110 get_job_is_blocking_on_stdin, 111 get_job_began, 112 get_job_ended, 113 get_job_paused, 114 get_job_status, 115 ) 116 117 def __init__( 118 self, 119 label: Optional[str] = None, 120 wait: bool = False, 121 debug: bool = False, 122 **kw 123 ): 124 if 'uri' in kw: 125 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 126 label = label or from_uri_params.get('label', None) 127 _ = from_uri_params.pop('label', None) 128 kw.update(from_uri_params) 129 130 super().__init__('api', label=label, **kw) 131 if 'protocol' not in self.__dict__: 132 self.protocol = ( 133 'https' if self.__dict__.get('uri', '').startswith('https') 134 else 'http' 135 ) 136 137 if 'uri' not in self.__dict__: 138 self.verify_attributes(required_attributes) 139 else: 140 from meerschaum.connectors.sql import SQLConnector 141 conn_attrs = SQLConnector.parse_uri(self.__dict__['uri']) 142 if 'host' not in conn_attrs: 143 raise Exception(f"Invalid URI for '{self}'.") 144 self.__dict__.update(conn_attrs) 145 146 self.url = ( 147 self.protocol + '://' + 148 self.host 149 + ( 150 (':' + str(self.port)) 151 if self.__dict__.get('port', None) 152 else '' 153 ) 154 ) 155 self._token = None 156 self._expires = None 157 self._session = None 158 self._instance_keys = self.__dict__.get('instance_keys', None) 159 160 161 @property 162 def URI(self) -> str: 163 """ 164 Return the fully qualified URI. 165 """ 166 import urllib.parse 167 username = self.__dict__.get('username', None) 168 password = self.__dict__.get('password', None) 169 client_id = self.__dict__.get('client_id', None) 170 client_secret = self.__dict__.get('client_secret', None) 171 api_key = self.__dict__.get('api_key', None) 172 creds = (username + ':' + password + '@') if username and password else '' 173 params = {} 174 params_str = ('?' + urllib.parse.urlencode(params)) if params else '' 175 return ( 176 self.protocol 177 + '://' 178 + creds 179 + self.host 180 + ( 181 (':' + str(self.port)) 182 if self.__dict__.get('port', None) 183 else '' 184 ) 185 + params_str 186 ) 187 188 @property 189 def session(self): 190 if self._session is None: 191 _ = attempt_import('certifi', lazy=False) 192 requests = attempt_import('requests', lazy=False) 193 if requests: 194 self._session = requests.Session() 195 if self._session is None: 196 error("Failed to import requests. Is requests installed?") 197 return self._session 198 199 @property 200 def token(self): 201 expired = ( 202 True if self._expires is None else ( 203 ( 204 self._expires 205 < 206 datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1) 207 ) 208 ) 209 ) 210 211 if self._token is None or expired: 212 success, msg = self.login() 213 if not success and not self.__dict__.get('_emitted_warning'): 214 warn(msg, stack=False) 215 self._emitted_warning = True 216 return self._token 217 218 @property 219 def instance_keys(self) -> Union[str, None]: 220 """ 221 Return the instance keys to be sent alongside pipe requests. 222 """ 223 return self._instance_keys 224 225 @property 226 def login_scheme(self) -> str: 227 """ 228 Return the login scheme to use based on the configured credentials. 229 """ 230 if 'username' in self.__dict__: 231 return 'password' 232 if 'client_id' in self.__dict__: 233 return 'client_credentials' 234 elif 'api_key' in self.__dict__: 235 return 'api_key' 236 237 return 'password'
Connect to a Meerschaum API instance.
117 def __init__( 118 self, 119 label: Optional[str] = None, 120 wait: bool = False, 121 debug: bool = False, 122 **kw 123 ): 124 if 'uri' in kw: 125 from_uri_params = self.from_uri(kw['uri'], as_dict=True) 126 label = label or from_uri_params.get('label', None) 127 _ = from_uri_params.pop('label', None) 128 kw.update(from_uri_params) 129 130 super().__init__('api', label=label, **kw) 131 if 'protocol' not in self.__dict__: 132 self.protocol = ( 133 'https' if self.__dict__.get('uri', '').startswith('https') 134 else 'http' 135 ) 136 137 if 'uri' not in self.__dict__: 138 self.verify_attributes(required_attributes) 139 else: 140 from meerschaum.connectors.sql import SQLConnector 141 conn_attrs = SQLConnector.parse_uri(self.__dict__['uri']) 142 if 'host' not in conn_attrs: 143 raise Exception(f"Invalid URI for '{self}'.") 144 self.__dict__.update(conn_attrs) 145 146 self.url = ( 147 self.protocol + '://' + 148 self.host 149 + ( 150 (':' + str(self.port)) 151 if self.__dict__.get('port', None) 152 else '' 153 ) 154 ) 155 self._token = None 156 self._expires = None 157 self._session = None 158 self._instance_keys = self.__dict__.get('instance_keys', None)
161 @property 162 def URI(self) -> str: 163 """ 164 Return the fully qualified URI. 165 """ 166 import urllib.parse 167 username = self.__dict__.get('username', None) 168 password = self.__dict__.get('password', None) 169 client_id = self.__dict__.get('client_id', None) 170 client_secret = self.__dict__.get('client_secret', None) 171 api_key = self.__dict__.get('api_key', None) 172 creds = (username + ':' + password + '@') if username and password else '' 173 params = {} 174 params_str = ('?' + urllib.parse.urlencode(params)) if params else '' 175 return ( 176 self.protocol 177 + '://' 178 + creds 179 + self.host 180 + ( 181 (':' + str(self.port)) 182 if self.__dict__.get('port', None) 183 else '' 184 ) 185 + params_str 186 )
Return the fully qualified URI.
188 @property 189 def session(self): 190 if self._session is None: 191 _ = attempt_import('certifi', lazy=False) 192 requests = attempt_import('requests', lazy=False) 193 if requests: 194 self._session = requests.Session() 195 if self._session is None: 196 error("Failed to import requests. Is requests installed?") 197 return self._session
199 @property 200 def token(self): 201 expired = ( 202 True if self._expires is None else ( 203 ( 204 self._expires 205 < 206 datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1) 207 ) 208 ) 209 ) 210 211 if self._token is None or expired: 212 success, msg = self.login() 213 if not success and not self.__dict__.get('_emitted_warning'): 214 warn(msg, stack=False) 215 self._emitted_warning = True 216 return self._token
218 @property 219 def instance_keys(self) -> Union[str, None]: 220 """ 221 Return the instance keys to be sent alongside pipe requests. 222 """ 223 return self._instance_keys
Return the instance keys to be sent alongside pipe requests.
225 @property 226 def login_scheme(self) -> str: 227 """ 228 Return the login scheme to use based on the configured credentials. 229 """ 230 if 'username' in self.__dict__: 231 return 'password' 232 if 'client_id' in self.__dict__: 233 return 'client_credentials' 234 elif 'api_key' in self.__dict__: 235 return 'api_key' 236 237 return 'password'
Return the login scheme to use based on the configured credentials.
28def make_request( 29 self, 30 method: str, 31 r_url: str, 32 headers: Optional[Dict[str, Any]] = None, 33 use_token: bool = True, 34 debug: bool = False, 35 **kwargs: Any 36) -> 'requests.Response': 37 """ 38 Make a request to this APIConnector's endpoint using the in-memory session. 39 40 Parameters 41 ---------- 42 method: str 43 The kind of request to make. 44 Accepted values: 45 - `'GET'` 46 - `'OPTIONS'` 47 - `'HEAD'` 48 - `'POST'` 49 - `'PUT'` 50 - `'PATCH'` 51 - `'DELETE'` 52 53 r_url: str 54 The relative URL for the endpoint (e.g. `'/pipes'`). 55 56 headers: Optional[Dict[str, Any]], default None 57 The headers to use for the request. 58 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 59 60 use_token: bool, default True 61 If `True`, add the authorization token to the headers. 62 63 debug: bool, default False 64 Verbosity toggle. 65 66 kwargs: Any 67 All other keyword arguments are passed to `requests.request`. 68 69 Returns 70 ------- 71 A `requests.Reponse` object. 72 """ 73 if method.upper() not in METHODS: 74 raise ValueError(f"Method '{method}' is not supported.") 75 76 verify = self.__dict__.get('verify', None) 77 if 'verify' not in kwargs and isinstance(verify, bool): 78 kwargs['verify'] = verify 79 80 headers = ( 81 copy.deepcopy(headers) 82 if isinstance(headers, dict) 83 else {} 84 ) 85 86 if use_token: 87 headers.update({'Authorization': f'Bearer {self.token}'}) 88 89 if 'timeout' not in kwargs: 90 kwargs['timeout'] = STATIC_CONFIG['api']['default_timeout'] 91 92 request_url = urllib.parse.urljoin(self.url, r_url) 93 if debug: 94 dprint(f"[{self}] Sending a '{method.upper()}' request to {request_url}") 95 96 return self.session.request( 97 method.upper(), 98 request_url, 99 headers=headers, 100 **kwargs 101 )
Make a request to this APIConnector's endpoint using the in-memory session.
Parameters
- method (str):
The kind of request to make.
Accepted values:
'GET''OPTIONS''HEAD''POST''PUT''PATCH''DELETE'
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_tokenisTrue, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request.
Returns
- A
requests.Reponseobject.
104def get(self, r_url: str, **kwargs: Any) -> 'requests.Response': 105 """ 106 Wrapper for `requests.get`. 107 108 Parameters 109 ---------- 110 r_url: str 111 The relative URL for the endpoint (e.g. `'/pipes'`). 112 113 headers: Optional[Dict[str, Any]], default None 114 The headers to use for the request. 115 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 116 117 use_token: bool, default True 118 If `True`, add the authorization token to the headers. 119 120 debug: bool, default False 121 Verbosity toggle. 122 123 kwargs: Any 124 All other keyword arguments are passed to `requests.request`. 125 126 Returns 127 ------- 128 A `requests.Reponse` object. 129 130 """ 131 return self.make_request('GET', r_url, **kwargs)
Wrapper for requests.get.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_tokenisTrue, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request.
Returns
- A
requests.Reponseobject.
134def post(self, r_url: str, **kwargs: Any) -> 'requests.Response': 135 """ 136 Wrapper for `requests.post`. 137 138 Parameters 139 ---------- 140 r_url: str 141 The relative URL for the endpoint (e.g. `'/pipes'`). 142 143 headers: Optional[Dict[str, Any]], default None 144 The headers to use for the request. 145 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 146 147 use_token: bool, default True 148 If `True`, add the authorization token to the headers. 149 150 debug: bool, default False 151 Verbosity toggle. 152 153 kwargs: Any 154 All other keyword arguments are passed to `requests.request`. 155 156 Returns 157 ------- 158 A `requests.Reponse` object. 159 160 """ 161 return self.make_request('POST', r_url, **kwargs)
Wrapper for requests.post.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_tokenisTrue, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request.
Returns
- A
requests.Reponseobject.
193def put(self, r_url: str, **kwargs: Any) -> 'requests.Response': 194 """ 195 Wrapper for `requests.put`. 196 197 Parameters 198 ---------- 199 r_url: str 200 The relative URL for the endpoint (e.g. `'/pipes'`). 201 202 headers: Optional[Dict[str, Any]], default None 203 The headers to use for the request. 204 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 205 206 use_token: bool, default True 207 If `True`, add the authorization token to the headers. 208 209 debug: bool, default False 210 Verbosity toggle. 211 212 kwargs: Any 213 All other keyword arguments are passed to `requests.request`. 214 215 Returns 216 ------- 217 A `requests.Reponse` object. 218 """ 219 return self.make_request('PUT', r_url, **kwargs)
Wrapper for requests.put.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_tokenisTrue, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request.
Returns
- A
requests.Reponseobject.
164def patch(self, r_url: str, **kwargs: Any) -> 'requests.Response': 165 """ 166 Wrapper for `requests.patch`. 167 168 Parameters 169 ---------- 170 r_url: str 171 The relative URL for the endpoint (e.g. `'/pipes'`). 172 173 headers: Optional[Dict[str, Any]], default None 174 The headers to use for the request. 175 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 176 177 use_token: bool, default True 178 If `True`, add the authorization token to the headers. 179 180 debug: bool, default False 181 Verbosity toggle. 182 183 kwargs: Any 184 All other keyword arguments are passed to `requests.request`. 185 186 Returns 187 ------- 188 A `requests.Reponse` object. 189 """ 190 return self.make_request('PATCH', r_url, **kwargs)
Wrapper for requests.patch.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_tokenisTrue, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request.
Returns
- A
requests.Reponseobject.
222def delete(self, r_url: str, **kwargs: Any) -> 'requests.Response': 223 """ 224 Wrapper for `requests.delete`. 225 226 Parameters 227 ---------- 228 r_url: str 229 The relative URL for the endpoint (e.g. `'/pipes'`). 230 231 headers: Optional[Dict[str, Any]], default None 232 The headers to use for the request. 233 If `use_token` is `True`, the authorization token will be added to a copy of these headers. 234 235 use_token: bool, default True 236 If `True`, add the authorization token to the headers. 237 238 debug: bool, default False 239 Verbosity toggle. 240 241 kwargs: Any 242 All other keyword arguments are passed to `requests.request`. 243 244 Returns 245 ------- 246 A `requests.Reponse` object. 247 """ 248 return self.make_request('DELETE', r_url, **kwargs)
Wrapper for requests.delete.
Parameters
- r_url (str):
The relative URL for the endpoint (e.g.
'/pipes'). - headers (Optional[Dict[str, Any]], default None):
The headers to use for the request.
If
use_tokenisTrue, the authorization token will be added to a copy of these headers. - use_token (bool, default True):
If
True, add the authorization token to the headers. - debug (bool, default False): Verbosity toggle.
- kwargs (Any):
All other keyword arguments are passed to
requests.request.
Returns
- A
requests.Reponseobject.
251def wget( 252 self, 253 r_url: str, 254 dest: Optional[Union[str, pathlib.Path]] = None, 255 headers: Optional[Dict[str, Any]] = None, 256 use_token: bool = True, 257 debug: bool = False, 258 **kw: Any 259) -> pathlib.Path: 260 """Mimic wget with requests.""" 261 from meerschaum.utils.misc import wget 262 if headers is None: 263 headers = {} 264 if use_token: 265 headers.update({'Authorization': f'Bearer {self.token}'}) 266 request_url = urllib.parse.urljoin(self.url, r_url) 267 if debug: 268 dprint( 269 f"[{self}] Downloading {request_url}" 270 + (f' to {dest}' if dest is not None else '') 271 + "..." 272 ) 273 return wget(request_url, dest=dest, headers=headers, **kw)
Mimic wget with requests.
24def get_actions(self): 25 """Get available actions from the API instance.""" 26 return self.get(ACTIONS_ENDPOINT)
Get available actions from the API instance.
29def do_action(self, sysargs: List[str]) -> SuccessTuple: 30 """ 31 Execute a Meerschaum action remotely. 32 """ 33 return asyncio.run(self.do_action_async(sysargs))
Execute a Meerschaum action remotely.
36async def do_action_async( 37 self, 38 sysargs: List[str], 39 callback_function: Callable[[str], None] = partial(print, end=''), 40) -> SuccessTuple: 41 """ 42 Execute an action as a temporary remote job. 43 """ 44 from meerschaum._internal.arguments import remove_api_executor_keys 45 from meerschaum.utils.misc import generate_password 46 sysargs = remove_api_executor_keys(sysargs) 47 48 job_name = TEMP_PREFIX + generate_password(12) 49 job = mrsm.Job(job_name, sysargs, executor_keys=str(self)) 50 51 start_success, start_msg = job.start() 52 if not start_success: 53 return start_success, start_msg 54 55 await job.monitor_logs_async( 56 callback_function=callback_function, 57 stop_on_exit=True, 58 strip_timestamps=True, 59 ) 60 61 success, msg = job.result 62 job.delete() 63 return success, msg
Execute an action as a temporary remote job.
66def do_action_legacy( 67 self, 68 action: Optional[List[str]] = None, 69 sysargs: Optional[List[str]] = None, 70 debug: bool = False, 71 **kw 72) -> SuccessTuple: 73 """ 74 NOTE: This method is deprecated. 75 Please use `do_action()` or `do_action_async()`. 76 77 Execute a Meerschaum action remotely. 78 79 If `sysargs` are provided, parse those instead. 80 Otherwise infer everything from keyword arguments. 81 82 Examples 83 -------- 84 >>> conn = mrsm.get_connector('api:main') 85 >>> conn.do_action(['show', 'pipes']) 86 (True, "Success") 87 >>> conn.do_action(['show', 'arguments'], name='test') 88 (True, "Success") 89 """ 90 import sys, json 91 from meerschaum.utils.debug import dprint 92 from meerschaum._internal.static import STATIC_CONFIG 93 from meerschaum.utils.misc import json_serialize_datetime 94 if action is None: 95 action = [] 96 97 if sysargs is not None and action and action[0] == '': 98 from meerschaum._internal.arguments import parse_arguments 99 if debug: 100 dprint(f"Parsing sysargs:\n{sysargs}") 101 json_dict = parse_arguments(sysargs) 102 else: 103 json_dict = kw 104 json_dict['action'] = action 105 if 'noask' not in kw: 106 json_dict['noask'] = True 107 if 'yes' not in kw: 108 json_dict['yes'] = True 109 if debug: 110 json_dict['debug'] = debug 111 112 root_action = json_dict['action'][0] 113 del json_dict['action'][0] 114 r_url = f"{STATIC_CONFIG['api']['endpoints']['actions']}/{root_action}" 115 116 if debug: 117 from meerschaum.utils.formatting import pprint 118 dprint(f"Sending data to '{self.url + r_url}':") 119 pprint(json_dict, stream=sys.stderr) 120 121 response = self.post( 122 r_url, 123 data = json.dumps(json_dict, default=json_serialize_datetime), 124 debug = debug, 125 ) 126 try: 127 response_list = json.loads(response.text) 128 if isinstance(response_list, dict) and 'detail' in response_list: 129 return False, response_list['detail'] 130 except Exception as e: 131 print(f"Invalid response: {response}") 132 print(e) 133 return False, response.text 134 if debug: 135 dprint(response) 136 try: 137 return response_list[0], response_list[1] 138 except Exception as e: 139 return False, f"Failed to parse result from action '{root_action}'"
NOTE: This method is deprecated.
Please use do_action() or do_action_async().
Execute a Meerschaum action remotely.
If sysargs are provided, parse those instead.
Otherwise infer everything from keyword arguments.
Examples
>>> conn = mrsm.get_connector('api:main')
>>> conn.do_action(['show', 'pipes'])
(True, "Success")
>>> conn.do_action(['show', 'arguments'], name='test')
(True, "Success")
13def get_mrsm_version(self, **kw) -> Optional[str]: 14 """ 15 Return the Meerschaum version of the API instance. 16 """ 17 from meerschaum._internal.static import STATIC_CONFIG 18 try: 19 j = self.get( 20 STATIC_CONFIG['api']['endpoints']['version'] + '/mrsm', 21 use_token=False, 22 **kw 23 ).json() 24 except Exception: 25 return None 26 if isinstance(j, dict) and 'detail' in j: 27 return None 28 return j
Return the Meerschaum version of the API instance.
31def get_chaining_status(self, **kw) -> Optional[bool]: 32 """ 33 Fetch the chaining status of the API instance. 34 """ 35 from meerschaum._internal.static import STATIC_CONFIG 36 try: 37 response = self.get( 38 STATIC_CONFIG['api']['endpoints']['chaining'], 39 use_token = True, 40 **kw 41 ) 42 if not response: 43 return None 44 except Exception: 45 return None 46 47 return response.json()
Fetch the chaining status of the API instance.
35def get_pipe_instance_keys(self, pipe: mrsm.Pipe) -> Union[str, None]: 36 """ 37 Return the configured instance keys for a pipe if set, 38 else fall back to the default `instance_keys` for this `APIConnector`. 39 """ 40 return pipe.parameters.get('instance_keys', self.instance_keys)
Return the configured instance keys for a pipe if set,
else fall back to the default instance_keys for this APIConnector.
43def register_pipe( 44 self, 45 pipe: mrsm.Pipe, 46 debug: bool = False 47) -> SuccessTuple: 48 """Submit a POST to the API to register a new Pipe object. 49 Returns a tuple of (success_bool, response_dict). 50 """ 51 from meerschaum.utils.debug import dprint 52 r_url = pipe_r_url(pipe) 53 response = self.post( 54 r_url + '/register', 55 json=pipe._attributes.get('parameters', {}), 56 params={'instance_keys': self.get_pipe_instance_keys(pipe)}, 57 debug=debug, 58 ) 59 if debug: 60 dprint(response.text) 61 62 if not response: 63 return False, response.text 64 65 response_data = response.json() 66 if isinstance(response_data, list): 67 response_tuple = response_data[0], response_data[1] 68 elif 'detail' in response.json(): 69 response_tuple = response.__bool__(), response_data['detail'] 70 else: 71 response_tuple = response.__bool__(), response.text 72 return response_tuple
Submit a POST to the API to register a new Pipe object. Returns a tuple of (success_bool, response_dict).
108def fetch_pipes_keys( 109 self, 110 connector_keys: Optional[List[str]] = None, 111 metric_keys: Optional[List[str]] = None, 112 location_keys: Optional[List[str]] = None, 113 tags: Optional[List[str]] = None, 114 params: Optional[Dict[str, Any]] = None, 115 debug: bool = False 116) -> List[ 117 Union[ 118 Tuple[str, str, Union[str, None]], 119 Tuple[str, str, Union[str, None], List[str]], 120 Tuple[str, str, Union[str, None], Dict[str, Any]] 121 ] 122 ]: 123 """ 124 Fetch registered Pipes' keys from the API. 125 126 Parameters 127 ---------- 128 connector_keys: Optional[List[str]], default None 129 The connector keys for the query. 130 131 metric_keys: Optional[List[str]], default None 132 The metric keys for the query. 133 134 location_keys: Optional[List[str]], default None 135 The location keys for the query. 136 137 tags: Optional[List[str]], default None 138 A list of tags for the query. 139 140 params: Optional[Dict[str, Any]], default None 141 A parameters dictionary for filtering against the `pipes` table 142 (e.g. `{'connector_keys': 'plugin:foo'}`). 143 Not recommeded to be used. 144 145 debug: bool, default False 146 Verbosity toggle. 147 148 Returns 149 ------- 150 A list of tuples containing pipes' keys. 151 """ 152 from meerschaum._internal.static import STATIC_CONFIG 153 if connector_keys is None: 154 connector_keys = [] 155 if metric_keys is None: 156 metric_keys = [] 157 if location_keys is None: 158 location_keys = [] 159 if tags is None: 160 tags = [] 161 162 r_url = STATIC_CONFIG['api']['endpoints']['pipes'] + '/keys' 163 try: 164 j = self.get( 165 r_url, 166 params={ 167 'connector_keys': json.dumps(connector_keys), 168 'metric_keys': json.dumps(metric_keys), 169 'location_keys': json.dumps(location_keys), 170 'tags': json.dumps(tags), 171 'params': json.dumps(params), 172 'instance_keys': self.instance_keys, 173 }, 174 debug=debug 175 ).json() 176 except Exception as e: 177 import traceback 178 traceback.print_exc() 179 error(str(e)) 180 181 if 'detail' in j: 182 error(j['detail'], stack=False) 183 return [tuple(r) for r in j]
Fetch registered Pipes' keys from the API.
Parameters
- connector_keys (Optional[List[str]], default None): The connector keys for the query.
- metric_keys (Optional[List[str]], default None): The metric keys for the query.
- location_keys (Optional[List[str]], default None): The location keys for the query.
- tags (Optional[List[str]], default None): A list of tags for the query.
- params (Optional[Dict[str, Any]], default None):
A parameters dictionary for filtering against the
pipestable (e.g.{'connector_keys': 'plugin:foo'}). Not recommeded to be used. - debug (bool, default False): Verbosity toggle.
Returns
- A list of tuples containing pipes' keys.
75def edit_pipe( 76 self, 77 pipe: mrsm.Pipe, 78 patch: bool = False, 79 debug: bool = False, 80) -> SuccessTuple: 81 """Submit a PATCH to the API to edit an existing Pipe object. 82 Returns a tuple of (success_bool, response_dict). 83 """ 84 from meerschaum.utils.debug import dprint 85 ### NOTE: if `parameters` is supplied in the Pipe constructor, 86 ### then `pipe.parameters` will exist and not be fetched from the database. 87 r_url = pipe_r_url(pipe) 88 response = self.patch( 89 r_url + '/edit', 90 params={'patch': patch, 'instance_keys': self.get_pipe_instance_keys(pipe)}, 91 json=pipe.get_parameters(apply_symlinks=False), 92 debug=debug, 93 ) 94 if debug: 95 dprint(response.text) 96 97 response_data = response.json() 98 99 if isinstance(response.json(), list): 100 response_tuple = response_data[0], response_data[1] 101 elif 'detail' in response.json(): 102 response_tuple = response.__bool__(), response_data['detail'] 103 else: 104 response_tuple = response.__bool__(), response.text 105 return response_tuple
Submit a PATCH to the API to edit an existing Pipe object. Returns a tuple of (success_bool, response_dict).
186def sync_pipe( 187 self, 188 pipe: mrsm.Pipe, 189 df: Optional[Union['pd.DataFrame', Dict[Any, Any], str]] = None, 190 chunksize: Optional[int] = -1, 191 debug: bool = False, 192 **kw: Any 193) -> SuccessTuple: 194 """Sync a DataFrame into a Pipe.""" 195 from decimal import Decimal 196 from meerschaum.utils.debug import dprint 197 from meerschaum.utils.dtypes import json_serialize_value 198 from meerschaum.utils.misc import items_str, interval_str 199 from meerschaum.config import get_config 200 from meerschaum.utils.packages import attempt_import 201 from meerschaum.utils.dataframe import get_special_cols, to_json 202 begin = time.perf_counter() 203 more_itertools = attempt_import('more_itertools') 204 if df is None: 205 msg = f"DataFrame is `None`. Cannot sync {pipe}." 206 return False, msg 207 208 def get_json_str(c): 209 if isinstance(c, str): 210 return c 211 if isinstance(c, (dict, list, tuple)): 212 return json.dumps(c, default=json_serialize_value) 213 return to_json(c, orient='columns', geometry_format='wkb_hex') 214 215 df = json.loads(df) if isinstance(df, str) else df 216 217 _chunksize: Optional[int] = (1 if chunksize is None else ( 218 get_config('system', 'connectors', 'sql', 'chunksize') if chunksize == -1 219 else chunksize 220 )) 221 keys: List[str] = list(df.columns) 222 chunks = [] 223 if hasattr(df, 'index'): 224 df = df.reset_index(drop=True) 225 is_dask = 'dask' in df.__module__ 226 chunks = ( 227 (df.iloc[i] for i in more_itertools.chunked(df.index, _chunksize)) 228 if not is_dask 229 else [partition.compute() for partition in df.partitions] 230 ) 231 232 elif isinstance(df, dict): 233 ### `_chunks` is a dict of lists of dicts. 234 ### e.g. {'a' : [ {'a':[1, 2]}, {'a':[3, 4]} ] } 235 _chunks = {k: [] for k in keys} 236 for k in keys: 237 chunk_iter = more_itertools.chunked(df[k], _chunksize) 238 for l in chunk_iter: 239 _chunks[k].append({k: l}) 240 241 ### `chunks` is a list of dicts (e.g. orient by rows in pandas JSON). 242 for k, l in _chunks.items(): 243 for i, c in enumerate(l): 244 try: 245 chunks[i].update(c) 246 except IndexError: 247 chunks.append(c) 248 elif isinstance(df, list): 249 chunks = (df[i] for i in more_itertools.chunked(df, _chunksize)) 250 251 ### Send columns in case the user has defined them locally. 252 request_params = kw.copy() 253 if pipe.columns: 254 request_params['columns'] = json.dumps(pipe.columns) 255 request_params['instance_keys'] = self.get_pipe_instance_keys(pipe) 256 r_url = pipe_r_url(pipe) + '/data' 257 258 rowcount = 0 259 num_success_chunks = 0 260 for i, c in enumerate(chunks): 261 if debug: 262 dprint(f"[{self}] Posting chunk {i} to {r_url}...") 263 if len(c) == 0: 264 if debug: 265 dprint(f"[{self}] Skipping empty chunk...") 266 continue 267 json_str = get_json_str(c) 268 269 try: 270 response = self.post( 271 r_url, 272 params=request_params, 273 data=json_str, 274 debug=debug, 275 ) 276 except Exception as e: 277 msg = f"Failed to post a chunk to {pipe}:\n{e}" 278 warn(msg) 279 return False, msg 280 281 if not response: 282 return False, f"Failed to sync a chunk:\n{response.text}" 283 284 try: 285 j = json.loads(response.text) 286 except Exception as e: 287 return False, f"Failed to parse response from syncing {pipe}:\n{e}" 288 289 if isinstance(j, dict) and 'detail' in j: 290 return False, j['detail'] 291 292 try: 293 j = tuple(j) 294 except Exception: 295 return False, response.text 296 297 if debug: 298 dprint("Received response: " + str(j)) 299 if not j[0]: 300 return j 301 302 rowcount += len(c) 303 num_success_chunks += 1 304 305 success_tuple = True, ( 306 f"It took {interval_str(timedelta(seconds=(time.perf_counter() - begin)))} " 307 + f"to sync {rowcount:,} row" 308 + ('s' if rowcount != 1 else '') 309 + f" across {num_success_chunks:,} chunk" + ('s' if num_success_chunks != 1 else '') + 310 f" to {pipe}." 311 ) 312 return success_tuple
Sync a DataFrame into a Pipe.
315def delete_pipe( 316 self, 317 pipe: Optional[mrsm.Pipe] = None, 318 debug: bool = False, 319) -> SuccessTuple: 320 """Delete a Pipe and drop its table.""" 321 if pipe is None: 322 error("Pipe cannot be None.") 323 r_url = pipe_r_url(pipe) 324 response = self.delete( 325 r_url + '/delete', 326 params={'instance_keys': self.get_pipe_instance_keys(pipe)}, 327 debug=debug, 328 ) 329 if debug: 330 dprint(response.text) 331 332 response_data = response.json() 333 if isinstance(response.json(), list): 334 response_tuple = response_data[0], response_data[1] 335 elif 'detail' in response.json(): 336 response_tuple = response.__bool__(), response_data['detail'] 337 else: 338 response_tuple = response.__bool__(), response.text 339 return response_tuple
Delete a Pipe and drop its table.
342def get_pipe_data( 343 self, 344 pipe: mrsm.Pipe, 345 select_columns: Optional[List[str]] = None, 346 omit_columns: Optional[List[str]] = None, 347 begin: Union[str, datetime, int, None] = None, 348 end: Union[str, datetime, int, None] = None, 349 params: Optional[Dict[str, Any]] = None, 350 as_chunks: bool = False, 351 debug: bool = False, 352 **kw: Any 353) -> Union[pandas.DataFrame, None]: 354 """Fetch data from the API.""" 355 r_url = pipe_r_url(pipe) 356 while True: 357 try: 358 response = self.get( 359 r_url + "/data", 360 params={ 361 'select_columns': json.dumps(select_columns), 362 'omit_columns': json.dumps(omit_columns), 363 'begin': begin, 364 'end': end, 365 'params': json.dumps(params, default=str), 366 'instance': self.get_pipe_instance_keys(pipe), 367 'as_chunks': as_chunks, 368 }, 369 debug=debug 370 ) 371 if not response.ok: 372 return None 373 j = response.json() 374 except Exception as e: 375 warn(f"Failed to get data for {pipe}:\n{e}") 376 return None 377 if isinstance(j, dict) and 'detail' in j: 378 return False, j['detail'] 379 break 380 381 from meerschaum.utils.dataframe import parse_df_datetimes, add_missing_cols_to_df 382 from meerschaum.utils.dtypes import are_dtypes_equal 383 try: 384 df = parse_df_datetimes( 385 j, 386 ignore_cols=[ 387 col 388 for col, dtype in pipe.dtypes.items() 389 if not are_dtypes_equal(str(dtype), 'datetime') 390 ], 391 strip_timezone=(pipe.tzinfo is None), 392 debug=debug, 393 ) 394 except Exception as e: 395 warn(f"Failed to parse response for {pipe}:\n{e}") 396 return None 397 398 if len(df.columns) == 0: 399 return add_missing_cols_to_df(df, pipe.dtypes) 400 401 return df
Fetch data from the API.
404def get_pipe_id( 405 self, 406 pipe: mrsm.Pipe, 407 debug: bool = False, 408) -> Union[int, str, None]: 409 """Get a Pipe's ID from the API.""" 410 from meerschaum.utils.misc import is_int 411 r_url = pipe_r_url(pipe) 412 response = self.get( 413 r_url + '/id', 414 params={ 415 'instance': self.get_pipe_instance_keys(pipe), 416 }, 417 debug=debug, 418 ) 419 if debug: 420 dprint(f"Got pipe ID: {response.text}") 421 try: 422 if is_int(response.text): 423 return int(response.text) 424 if response.text and response.text[0] != '{': 425 return response.text 426 except Exception as e: 427 warn(f"Failed to get the ID for {pipe}:\n{e}") 428 return None
Get a Pipe's ID from the API.
431def get_pipe_attributes( 432 self, 433 pipe: mrsm.Pipe, 434 debug: bool = False, 435) -> Dict[str, Any]: 436 """Get a Pipe's attributes from the API 437 438 Parameters 439 ---------- 440 pipe: meerschaum.Pipe 441 The pipe whose attributes we are fetching. 442 443 Returns 444 ------- 445 A dictionary of a pipe's attributes. 446 If the pipe does not exist, return an empty dictionary. 447 """ 448 r_url = pipe_r_url(pipe) 449 response = self.get( 450 r_url + '/attributes', 451 params={ 452 'instance': self.get_pipe_instance_keys(pipe), 453 }, 454 debug=debug 455 ) 456 try: 457 return json.loads(response.text) 458 except Exception as e: 459 warn(f"Failed to get the attributes for {pipe}:\n{e}") 460 return {}
Get a Pipe's attributes from the API
Parameters
- pipe (meerschaum.Pipe): The pipe whose attributes we are fetching.
Returns
- A dictionary of a pipe's attributes.
- If the pipe does not exist, return an empty dictionary.
463def get_sync_time( 464 self, 465 pipe: mrsm.Pipe, 466 params: Optional[Dict[str, Any]] = None, 467 newest: bool = True, 468 debug: bool = False, 469) -> Union[datetime, int, None]: 470 """Get a Pipe's most recent datetime value from the API. 471 472 Parameters 473 ---------- 474 pipe: meerschaum.Pipe 475 The pipe to select from. 476 477 params: Optional[Dict[str, Any]], default None 478 Optional params dictionary to build the WHERE clause. 479 480 newest: bool, default True 481 If `True`, get the most recent datetime (honoring `params`). 482 If `False`, get the oldest datetime (ASC instead of DESC). 483 484 Returns 485 ------- 486 The most recent (or oldest if `newest` is `False`) datetime of a pipe, 487 rounded down to the closest minute. 488 """ 489 from meerschaum.utils.misc import is_int 490 from meerschaum.utils.warnings import warn 491 r_url = pipe_r_url(pipe) 492 response = self.get( 493 r_url + '/sync_time', 494 json=params, 495 params={ 496 'instance': self.get_pipe_instance_keys(pipe), 497 'newest': newest, 498 'debug': debug, 499 }, 500 debug=debug, 501 ) 502 if not response: 503 warn(f"Failed to get the sync time for {pipe}:\n" + response.text) 504 return None 505 506 j = response.json() 507 if j is None: 508 dt = None 509 else: 510 try: 511 dt = ( 512 datetime.fromisoformat(j) 513 if not is_int(j) 514 else int(j) 515 ) 516 except Exception as e: 517 warn(f"Failed to parse the sync time '{j}' for {pipe}:\n{e}") 518 dt = None 519 return dt
Get a Pipe's most recent datetime value from the API.
Parameters
- pipe (meerschaum.Pipe): The pipe to select from.
- params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause.
- newest (bool, default True):
If
True, get the most recent datetime (honoringparams). IfFalse, get the oldest datetime (ASC instead of DESC).
Returns
- The most recent (or oldest if
newestisFalse) datetime of a pipe, - rounded down to the closest minute.
522def pipe_exists( 523 self, 524 pipe: mrsm.Pipe, 525 debug: bool = False 526) -> bool: 527 """Check the API to see if a Pipe exists. 528 529 Parameters 530 ---------- 531 pipe: 'meerschaum.Pipe' 532 The pipe which were are querying. 533 534 Returns 535 ------- 536 A bool indicating whether a pipe's underlying table exists. 537 """ 538 from meerschaum.utils.debug import dprint 539 from meerschaum.utils.warnings import warn 540 r_url = pipe_r_url(pipe) 541 response = self.get( 542 r_url + '/exists', 543 params={ 544 'instance': self.get_pipe_instance_keys(pipe), 545 }, 546 debug=debug, 547 ) 548 if not response: 549 warn(f"Failed to check if {pipe} exists:\n{response.text}") 550 return False 551 if debug: 552 dprint("Received response: " + str(response.text)) 553 j = response.json() 554 if isinstance(j, dict) and 'detail' in j: 555 warn(j['detail']) 556 return j
Check the API to see if a Pipe exists.
Parameters
- pipe ('meerschaum.Pipe'): The pipe which were are querying.
Returns
- A bool indicating whether a pipe's underlying table exists.
559def create_metadata( 560 self, 561 debug: bool = False 562) -> bool: 563 """Create metadata tables. 564 565 Returns 566 ------- 567 A bool indicating success. 568 """ 569 from meerschaum.utils.debug import dprint 570 from meerschaum._internal.static import STATIC_CONFIG 571 r_url = STATIC_CONFIG['api']['endpoints']['metadata'] 572 response = self.post(r_url, debug=debug) 573 if debug: 574 dprint("Create metadata response: {response.text}") 575 try: 576 _ = json.loads(response.text) 577 except Exception as e: 578 warn(f"Failed to create metadata on {self}:\n{e}") 579 return False
Create metadata tables.
Returns
- A bool indicating success.
582def get_pipe_rowcount( 583 self, 584 pipe: mrsm.Pipe, 585 begin: Union[str, datetime, int, None] = None, 586 end: Union[str, datetime, int, None] = None, 587 params: Optional[Dict[str, Any]] = None, 588 remote: bool = False, 589 debug: bool = False, 590) -> int: 591 """Get a pipe's row count from the API. 592 593 Parameters 594 ---------- 595 pipe: 'meerschaum.Pipe': 596 The pipe whose row count we are counting. 597 598 begin: Union[str, datetime, int, None], default None 599 If provided, bound the count by this datetime. 600 601 end: Union[str, datetime, int, None], default None 602 If provided, bound the count by this datetime. 603 604 params: Optional[Dict[str, Any]], default None 605 If provided, bound the count by these parameters. 606 607 remote: bool, default False 608 If `True`, return the rowcount for the fetch definition. 609 610 Returns 611 ------- 612 The number of rows in the pipe's table, bound the given parameters. 613 If the table does not exist, return 0. 614 """ 615 r_url = pipe_r_url(pipe) 616 response = self.get( 617 r_url + "/rowcount", 618 json = params, 619 params = { 620 'begin': begin, 621 'end': end, 622 'remote': remote, 623 'instance': self.get_pipe_instance_keys(pipe), 624 }, 625 debug = debug 626 ) 627 if not response: 628 warn(f"Failed to get the rowcount for {pipe}:\n{response.text}") 629 return 0 630 try: 631 return int(json.loads(response.text)) 632 except Exception as e: 633 warn(f"Failed to get the rowcount for {pipe}:\n{e}") 634 return 0
Get a pipe's row count from the API.
Parameters
- pipe ('meerschaum.Pipe':): The pipe whose row count we are counting.
- begin (Union[str, datetime, int, None], default None): If provided, bound the count by this datetime.
- end (Union[str, datetime, int, None], default None): If provided, bound the count by this datetime.
- params (Optional[Dict[str, Any]], default None): If provided, bound the count by these parameters.
- remote (bool, default False):
If
True, return the rowcount for the fetch definition.
Returns
- The number of rows in the pipe's table, bound the given parameters.
- If the table does not exist, return 0.
637def drop_pipe( 638 self, 639 pipe: mrsm.Pipe, 640 debug: bool = False 641) -> SuccessTuple: 642 """ 643 Drop a pipe's table but maintain its registration. 644 645 Parameters 646 ---------- 647 pipe: meerschaum.Pipe: 648 The pipe to be dropped. 649 650 Returns 651 ------- 652 A success tuple (bool, str). 653 """ 654 from meerschaum.utils.warnings import error 655 from meerschaum.utils.debug import dprint 656 if pipe is None: 657 error("Pipe cannot be None.") 658 r_url = pipe_r_url(pipe) 659 response = self.delete( 660 r_url + '/drop', 661 params={ 662 'instance': self.get_pipe_instance_keys(pipe), 663 }, 664 debug=debug, 665 ) 666 if debug: 667 dprint(response.text) 668 669 try: 670 data = response.json() 671 except Exception as e: 672 return False, f"Failed to drop {pipe}." 673 674 if isinstance(data, list): 675 response_tuple = data[0], data[1] 676 elif 'detail' in response.json(): 677 response_tuple = response.__bool__(), data['detail'] 678 else: 679 response_tuple = response.__bool__(), response.text 680 681 return response_tuple
Drop a pipe's table but maintain its registration.
Parameters
- pipe (meerschaum.Pipe:): The pipe to be dropped.
Returns
- A success tuple (bool, str).
684def clear_pipe( 685 self, 686 pipe: mrsm.Pipe, 687 begin: Union[str, datetime, int, None] = None, 688 end: Union[str, datetime, int, None] = None, 689 params: Optional[Dict[str, Any]] = None, 690 debug: bool = False, 691 **kw 692) -> SuccessTuple: 693 """ 694 Delete rows in a pipe's table. 695 696 Parameters 697 ---------- 698 pipe: meerschaum.Pipe 699 The pipe with rows to be deleted. 700 701 Returns 702 ------- 703 A success tuple. 704 """ 705 r_url = pipe_r_url(pipe) 706 response = self.delete( 707 r_url + '/clear', 708 params={ 709 'begin': begin, 710 'end': end, 711 'params': json.dumps(params), 712 'instance': self.get_pipe_instance_keys(pipe), 713 }, 714 debug=debug, 715 ) 716 if debug: 717 dprint(response.text) 718 719 try: 720 data = response.json() 721 except Exception as e: 722 return False, f"Failed to clear {pipe} with constraints {begin=}, {end=}, {params=}." 723 724 if isinstance(data, list): 725 response_tuple = data[0], data[1] 726 elif 'detail' in response.json(): 727 response_tuple = response.__bool__(), data['detail'] 728 else: 729 response_tuple = response.__bool__(), response.text 730 731 return response_tuple
Delete rows in a pipe's table.
Parameters
- pipe (meerschaum.Pipe): The pipe with rows to be deleted.
Returns
- A success tuple.
734def get_pipe_columns_types( 735 self, 736 pipe: mrsm.Pipe, 737 debug: bool = False, 738) -> Union[Dict[str, str], None]: 739 """ 740 Fetch the columns and types of the pipe's table. 741 742 Parameters 743 ---------- 744 pipe: meerschaum.Pipe 745 The pipe whose columns to be queried. 746 747 Returns 748 ------- 749 A dictionary mapping column names to their database types. 750 751 Examples 752 -------- 753 >>> { 754 ... 'dt': 'TIMESTAMP WITHOUT TIMEZONE', 755 ... 'id': 'BIGINT', 756 ... 'val': 'DOUBLE PRECISION', 757 ... } 758 >>> 759 """ 760 r_url = pipe_r_url(pipe) + '/columns/types' 761 response = self.get( 762 r_url, 763 params={ 764 'instance': self.get_pipe_instance_keys(pipe), 765 }, 766 debug=debug, 767 ) 768 j = response.json() 769 if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1: 770 warn(j['detail']) 771 return None 772 if not isinstance(j, dict): 773 warn(response.text) 774 return None 775 return j
Fetch the columns and types of the pipe's table.
Parameters
- pipe (meerschaum.Pipe): The pipe whose columns to be queried.
Returns
- A dictionary mapping column names to their database types.
Examples
>>> {
... 'dt': 'TIMESTAMP WITHOUT TIMEZONE',
... 'id': 'BIGINT',
... 'val': 'DOUBLE PRECISION',
... }
>>>
778def get_pipe_columns_indices( 779 self, 780 pipe: mrsm.Pipe, 781 debug: bool = False, 782) -> Union[Dict[str, str], None]: 783 """ 784 Fetch the index information for a pipe. 785 786 Parameters 787 ---------- 788 pipe: mrsm.Pipe 789 The pipe whose columns to be queried. 790 791 Returns 792 ------- 793 A dictionary mapping column names to a list of associated index information. 794 """ 795 r_url = pipe_r_url(pipe) + '/columns/indices' 796 response = self.get( 797 r_url, 798 params={ 799 'instance': self.get_pipe_instance_keys(pipe), 800 }, 801 debug=debug, 802 ) 803 j = response.json() 804 if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1: 805 warn(j['detail']) 806 return None 807 if not isinstance(j, dict): 808 warn(response.text) 809 return None 810 return j
Fetch the index information for a pipe.
Parameters
- pipe (mrsm.Pipe): The pipe whose columns to be queried.
Returns
- A dictionary mapping column names to a list of associated index information.
16def fetch( 17 self, 18 pipe: mrsm.Pipe, 19 begin: Union[datetime, str, int] = '', 20 end: Union[datetime, int] = None, 21 params: Optional[Dict, Any] = None, 22 debug: bool = False, 23 **kw: Any 24 ) -> Iterator['pd.DataFrame']: 25 """Get the Pipe data from the remote Pipe.""" 26 from meerschaum.utils.debug import dprint 27 from meerschaum.utils.warnings import warn, error 28 from meerschaum.config._patch import apply_patch_to_config 29 30 fetch_params = pipe.parameters.get('fetch', {}) 31 if not fetch_params: 32 warn(f"Missing 'fetch' parameters for {pipe}.", stack=False) 33 return None 34 35 pipe_meta = fetch_params.get('pipe', {}) 36 ### Legacy: check for `connector_keys`, etc. at the root. 37 if not pipe_meta: 38 ck, mk, lk = ( 39 fetch_params.get('connector_keys', None), 40 fetch_params.get('metric_key', None), 41 fetch_params.get('location_key', None), 42 ) 43 if not ck or not mk: 44 warn(f"Missing `fetch:pipe` keys for {pipe}.", stack=False) 45 return None 46 47 pipe_meta.update({ 48 'connector': ck, 49 'metric': mk, 50 'location': lk, 51 }) 52 53 pipe_meta['instance'] = self 54 source_pipe = mrsm.Pipe(**pipe_meta) 55 56 _params = copy.deepcopy(params) if params is not None else {} 57 _params = apply_patch_to_config(_params, fetch_params.get('params', {})) 58 select_columns = fetch_params.get('select_columns', []) 59 omit_columns = fetch_params.get('omit_columns', []) 60 61 return source_pipe.get_data( 62 select_columns = select_columns, 63 omit_columns = omit_columns, 64 begin = begin, 65 end = end, 66 params = _params, 67 debug = debug, 68 as_iterator = True, 69 )
Get the Pipe data from the remote Pipe.
24def register_plugin( 25 self, 26 plugin: mrsm.core.Plugin, 27 make_archive: bool = True, 28 debug: bool = False, 29) -> SuccessTuple: 30 """Register a plugin and upload its archive.""" 31 import json 32 archive_path = plugin.make_tar(debug=debug) if make_archive else plugin.archive_path 33 file_pointer = open(archive_path, 'rb') 34 files = {'archive': file_pointer} 35 metadata = { 36 'version': plugin.version, 37 'attributes': json.dumps(plugin.attributes), 38 } 39 r_url = plugin_r_url(plugin) 40 try: 41 response = self.post(r_url, files=files, params=metadata, debug=debug) 42 except Exception: 43 return False, f"Failed to register plugin '{plugin}'." 44 finally: 45 file_pointer.close() 46 47 try: 48 success, msg = json.loads(response.text) 49 except Exception: 50 return False, response.text 51 52 return success, msg
Register a plugin and upload its archive.
55def install_plugin( 56 self, 57 name: str, 58 skip_deps: bool = False, 59 force: bool = False, 60 debug: bool = False 61) -> SuccessTuple: 62 """Download and attempt to install a plugin from the API.""" 63 import os 64 import pathlib 65 import json 66 from meerschaum.core import Plugin 67 from meerschaum.config._paths import PLUGINS_TEMP_RESOURCES_PATH 68 from meerschaum.utils.debug import dprint 69 from meerschaum.utils.packages import attempt_import 70 binaryornot_check = attempt_import('binaryornot.check', lazy=False) 71 r_url = plugin_r_url(name) 72 dest = pathlib.Path(os.path.join(PLUGINS_TEMP_RESOURCES_PATH, name + '.tar.gz')) 73 if debug: 74 dprint(f"Fetching from '{self.url + r_url}' to '{dest}'...") 75 archive_path = self.wget(r_url, dest, debug=debug) 76 is_binary = binaryornot_check.is_binary(str(archive_path)) 77 if not is_binary: 78 fail_msg = f"Failed to download binary for plugin '{name}'." 79 try: 80 with open(archive_path, 'r') as f: 81 j = json.load(f) 82 if isinstance(j, list): 83 success, msg = tuple(j) 84 elif isinstance(j, dict) and 'detail' in j: 85 success, msg = False, fail_msg 86 except Exception: 87 success, msg = False, fail_msg 88 return success, msg 89 plugin = Plugin(name, archive_path=archive_path, repo_connector=self) 90 return plugin.install(skip_deps=skip_deps, force=force, debug=debug)
Download and attempt to install a plugin from the API.
156def delete_plugin( 157 self, 158 plugin: mrsm.core.Plugin, 159 debug: bool = False 160) -> SuccessTuple: 161 """Delete a plugin from an API repository.""" 162 import json 163 r_url = plugin_r_url(plugin) 164 try: 165 response = self.delete(r_url, debug=debug) 166 except Exception: 167 return False, f"Failed to delete plugin '{plugin}'." 168 169 try: 170 success, msg = json.loads(response.text) 171 except Exception: 172 return False, response.text 173 174 return success, msg
Delete a plugin from an API repository.
93def get_plugins( 94 self, 95 user_id: Optional[int] = None, 96 search_term: Optional[str] = None, 97 debug: bool = False 98) -> List[str]: 99 """Return a list of registered plugin names. 100 101 Parameters 102 ---------- 103 user_id: Optional[int], default None 104 If specified, return all plugins from a certain user. 105 106 search_term: Optional[str], default None 107 If specified, return plugins beginning with this string. 108 109 Returns 110 ------- 111 A list of plugin names. 112 """ 113 import json 114 from meerschaum.utils.warnings import error 115 from meerschaum._internal.static import STATIC_CONFIG 116 response = self.get( 117 STATIC_CONFIG['api']['endpoints']['plugins'], 118 params = {'user_id': user_id, 'search_term': search_term}, 119 use_token = True, 120 debug = debug 121 ) 122 if not response: 123 return [] 124 plugins = json.loads(response.text) 125 if not isinstance(plugins, list): 126 error(response.text) 127 return plugins
Return a list of registered plugin names.
Parameters
- user_id (Optional[int], default None): If specified, return all plugins from a certain user.
- search_term (Optional[str], default None): If specified, return plugins beginning with this string.
Returns
- A list of plugin names.
130def get_plugin_attributes( 131 self, 132 plugin: mrsm.core.Plugin, 133 debug: bool = False 134) -> Dict[str, Any]: 135 """ 136 Return a plugin's attributes. 137 """ 138 import json 139 from meerschaum.utils.warnings import warn, error 140 r_url = plugin_r_url(plugin) + '/attributes' 141 response = self.get(r_url, use_token=True, debug=debug) 142 attributes = response.json() 143 if isinstance(attributes, str) and attributes and attributes[0] == '{': 144 try: 145 attributes = json.loads(attributes) 146 except Exception: 147 pass 148 if not isinstance(attributes, dict): 149 error(response.text) 150 elif not response and 'detail' in attributes: 151 warn(attributes['detail']) 152 return {} 153 return attributes
Return a plugin's attributes.
19def login( 20 self, 21 debug: bool = False, 22 warn: bool = True, 23 **kw: Any 24) -> SuccessTuple: 25 """Log in and set the session token.""" 26 if self.login_scheme == 'api_key': 27 validate_response = self.post( 28 STATIC_CONFIG['api']['endpoints']['tokens'] + '/validate', 29 headers={'Authorization': f'Bearer {self.api_key}'}, 30 use_token=False, 31 debug=debug, 32 ) 33 if not validate_response: 34 return False, "API key is not valid." 35 return True, "API key is valid." 36 37 try: 38 if self.login_scheme == 'password': 39 login_data = { 40 'username': self.username, 41 'password': self.password, 42 } 43 elif self.login_scheme == 'client_credentials': 44 login_data = { 45 'client_id': self.client_id, 46 'client_secret': self.client_secret, 47 } 48 except AttributeError: 49 login_data = {} 50 51 if not login_data: 52 return False, f"Please login with the command `login {self}`." 53 54 login_scheme_msg = ( 55 f" as user '{login_data['username']}'" 56 if self.login_scheme == 'username' 57 else '' 58 ) 59 60 response = self.post( 61 STATIC_CONFIG['api']['endpoints']['login'], 62 data=login_data, 63 use_token=False, 64 debug=debug, 65 ) 66 if response: 67 msg = f"Successfully logged into '{self}'{login_scheme_msg}'." 68 self._token = json.loads(response.text)['access_token'] 69 self._expires = datetime.datetime.strptime( 70 json.loads(response.text)['expires'], 71 '%Y-%m-%dT%H:%M:%S.%f' 72 ) 73 else: 74 msg = ( 75 f"Failed to log into '{self}'{login_scheme_msg}.\n" + 76 f" Please verify login details for connector '{self}'." 77 ) 78 if warn and not self.__dict__.get('_emitted_warning', False): 79 _warn(msg, stack=False) 80 self._emitted_warning = True 81 82 return response.__bool__(), msg
Log in and set the session token.
85def test_connection( 86 self, 87 **kw: Any 88) -> Union[bool, None]: 89 """Test if a successful connection to the API may be made.""" 90 from meerschaum.connectors.poll import retry_connect 91 _default_kw = { 92 'max_retries': 1, 'retry_wait': 0, 'warn': False, 93 'connector': self, 'enforce_chaining': False, 94 'enforce_login': False, 95 } 96 _default_kw.update(kw) 97 try: 98 return retry_connect(**_default_kw) 99 except Exception: 100 return False
Test if a successful connection to the API may be made.
70def register_user( 71 self, 72 user: mrsm.core.User, 73 debug: bool = False, 74 **kw: Any 75) -> SuccessTuple: 76 """Register a new user.""" 77 from meerschaum._internal.static import STATIC_CONFIG 78 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/register" 79 data = { 80 'username': user.username, 81 'password': user.password, 82 'attributes': json.dumps(user.attributes), 83 } 84 if user.type: 85 data['type'] = user.type 86 if user.email: 87 data['email'] = user.email 88 response = self.post(r_url, data=data, debug=debug) 89 try: 90 _json = json.loads(response.text) 91 if isinstance(_json, dict) and 'detail' in _json: 92 return False, _json['detail'] 93 success_tuple = tuple(_json) 94 except Exception: 95 msg = response.text if response else f"Failed to register user '{user}'." 96 return False, msg 97 98 return tuple(success_tuple)
Register a new user.
101def get_user_id( 102 self, 103 user: mrsm.core.User, 104 debug: bool = False, 105 **kw: Any 106) -> Union[int, str, UUID, None]: 107 """Get a user's ID.""" 108 from meerschaum._internal.static import STATIC_CONFIG 109 from meerschaum.utils.misc import is_int, is_uuid 110 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/id" 111 response = self.get(r_url, debug=debug, **kw) 112 try: 113 id_text = str(json.loads(response.text)) 114 if is_int(id_text): 115 user_id = int(id_text) 116 elif is_uuid(id_text): 117 user_id = UUID(id_text) 118 else: 119 user_id = id_text 120 except Exception as e: 121 user_id = None 122 return user_id
Get a user's ID.
19def get_users( 20 self, 21 debug: bool = False, 22 **kw: Any 23) -> List[str]: 24 """ 25 Return a list of registered usernames. 26 """ 27 from meerschaum._internal.static import STATIC_CONFIG 28 response = self.get( 29 f"{STATIC_CONFIG['api']['endpoints']['users']}", 30 debug = debug, 31 use_token = True, 32 ) 33 if not response: 34 return [] 35 try: 36 return response.json() 37 except Exception as e: 38 return []
Return a list of registered usernames.
41def edit_user( 42 self, 43 user: mrsm.core.User, 44 debug: bool = False, 45 **kw: Any 46) -> SuccessTuple: 47 """Edit an existing user.""" 48 from meerschaum._internal.static import STATIC_CONFIG 49 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/edit" 50 data = { 51 'username': user.username, 52 'password': user.password, 53 'type': user.type, 54 'email': user.email, 55 'attributes': json.dumps(user.attributes), 56 } 57 response = self.post(r_url, data=data, debug=debug) 58 try: 59 _json = json.loads(response.text) 60 if isinstance(_json, dict) and 'detail' in _json: 61 return False, _json['detail'] 62 success_tuple = tuple(_json) 63 except Exception: 64 msg = response.text if response else f"Failed to edit user '{user}'." 65 return False, msg 66 67 return tuple(success_tuple)
Edit an existing user.
125def delete_user( 126 self, 127 user: mrsm.core.User, 128 debug: bool = False, 129 **kw: Any 130) -> SuccessTuple: 131 """Delete a user.""" 132 from meerschaum._internal.static import STATIC_CONFIG 133 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}" 134 response = self.delete(r_url, debug=debug) 135 try: 136 _json = json.loads(response.text) 137 if isinstance(_json, dict) and 'detail' in _json: 138 return False, _json['detail'] 139 success_tuple = tuple(_json) 140 except Exception: 141 success_tuple = False, f"Failed to delete user '{user.username}'." 142 return success_tuple
Delete a user.
166def get_user_password_hash( 167 self, 168 user: mrsm.core.User, 169 debug: bool = False, 170 **kw: Any 171) -> Optional[str]: 172 """If configured, get a user's password hash.""" 173 from meerschaum._internal.static import STATIC_CONFIG 174 r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/password_hash' 175 response = self.get(r_url, debug=debug, **kw) 176 if not response: 177 return None 178 return response.json()
If configured, get a user's password hash.
181def get_user_type( 182 self, 183 user: mrsm.core.User, 184 debug: bool = False, 185 **kw: Any 186) -> Optional[str]: 187 """If configured, get a user's type.""" 188 from meerschaum._internal.static import STATIC_CONFIG 189 r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/type' 190 response = self.get(r_url, debug=debug, **kw) 191 if not response: 192 return None 193 return response.json()
If configured, get a user's type.
145def get_user_attributes( 146 self, 147 user: mrsm.core.User, 148 debug: bool = False, 149 **kw 150) -> int: 151 """Get a user's attributes.""" 152 from meerschaum._internal.static import STATIC_CONFIG 153 r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/attributes" 154 response = self.get(r_url, debug=debug, **kw) 155 try: 156 attributes = json.loads(response.text) 157 except Exception: 158 attributes = None 159 return attributes
Get a user's attributes.
20def register_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple: 21 """ 22 Register the provided token to the API. 23 """ 24 from meerschaum.utils.dtypes import json_serialize_value 25 r_url = tokens_endpoint + '/register' 26 response = self.post( 27 r_url, 28 data=json.dumps({ 29 'label': token.label, 30 'scopes': token.scopes, 31 'expiration': token.expiration, 32 }, default=json_serialize_value), 33 debug=debug, 34 ) 35 if not response: 36 return False, f"Failed to register token:\n{response.text}" 37 38 data = response.json() 39 token.label = data['label'] 40 token.secret = data['secret'] 41 token.id = uuid.UUID(data['id']) 42 if data.get('expiration', None): 43 token.expiration = datetime.fromisoformat(data['expiration']) 44 45 return True, f"Registered token '{token.label}'."
Register the provided token to the API.
48def get_token_model(self, token_id: uuid.UUID, debug: bool = False) -> 'Union[TokenModel, None]': 49 """ 50 Return a token's model from the API instance. 51 """ 52 from meerschaum.models import TokenModel 53 r_url = tokens_endpoint + f'/{token_id}' 54 response = self.get(r_url, debug=debug) 55 if not response: 56 return None 57 data = response.json() 58 return TokenModel(**data)
Return a token's model from the API instance.
61def get_tokens(self, labels: Optional[List[str]] = None, debug: bool = False) -> List[Token]: 62 """ 63 Return the tokens registered to the current user. 64 """ 65 from meerschaum.utils.warnings import warn 66 r_url = tokens_endpoint 67 params = {} 68 if labels: 69 params['labels'] = ','.join(labels) 70 response = self.get(r_url, params={'labels': labels}, debug=debug) 71 if not response: 72 warn(f"Could not get tokens from '{self}':\n{response.text}") 73 return [] 74 75 tokens = [ 76 Token(instance=self, **payload) 77 for payload in response.json() 78 ] 79 return tokens
Return the tokens registered to the current user.
82def edit_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple: 83 """ 84 Persist the token's in-memory state to the API. 85 """ 86 r_url = tokens_endpoint + f"/{token.id}/edit" 87 response = self.post( 88 r_url, 89 json={ 90 'creation': token.creation.isoformat() if token.creation else None, 91 'expiration': token.expiration.isoformat() if token.expiration else None, 92 'label': token.label, 93 'is_valid': token.is_valid, 94 'scopes': token.scopes, 95 }, 96 ) 97 if not response: 98 return False, f"Failed to edit token:\n{response.text}" 99 100 success, msg = response.json() 101 return success, msg
Persist the token's in-memory state to the API.
104def invalidate_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple: 105 """ 106 Invalidate the token, disabling it for future requests. 107 """ 108 r_url = tokens_endpoint + f"/{token.id}/invalidate" 109 response = self.post(r_url) 110 if not response: 111 return False, f"Failed to invalidate token:\n{response.text}" 112 113 success, msg = response.json() 114 return success, msg
Invalidate the token, disabling it for future requests.
117def get_token_scopes(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> List[str]: 118 """ 119 Return the scopes for a token. 120 """ 121 _token_id = (token_id.id if isinstance(token_id, Token) else token_id) 122 model = self.get_token_model(_token_id, debug=debug).scopes 123 return getattr(model, 'scopes', [])
Return the scopes for a token.
126def token_exists(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> bool: 127 """ 128 Return `True` if a token exists. 129 """ 130 _token_id = (token_id.id if isinstance(token_id, Token) else token_id) 131 model = self.get_token_model(_token_id, debug=debug) 132 if model is None: 133 return False 134 return model.creation is not None
Return True if a token exists.
137def delete_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple: 138 """ 139 Delete the token from the API. 140 """ 141 r_url = tokens_endpoint + f"/{token.id}" 142 response = self.delete(r_url, debug=debug) 143 if not response: 144 return False, f"Failed to delete token:\n{response.text}" 145 146 success, msg = response.json() 147 return success, msg
Delete the token from the API.
13@classmethod 14def from_uri( 15 cls, 16 uri: str, 17 label: Optional[str] = None, 18 as_dict: bool = False, 19) -> Union[ 20 'meerschaum.connectors.APIConnector', 21 Dict[str, Union[str, int]], 22 ]: 23 """ 24 Create a new APIConnector from a URI string. 25 26 Parameters 27 ---------- 28 uri: str 29 The URI connection string. 30 31 label: Optional[str], default None 32 If provided, use this as the connector label. 33 Otherwise use the determined database name. 34 35 as_dict: bool, default False 36 If `True`, return a dictionary of the keyword arguments 37 necessary to create a new `APIConnector`, otherwise create a new object. 38 39 Returns 40 ------- 41 A new APIConnector object or a dictionary of attributes (if `as_dict` is `True`). 42 """ 43 from meerschaum.connectors.sql import SQLConnector 44 params = SQLConnector.parse_uri(uri) 45 if 'host' not in params: 46 error("No host was found in the provided URI.") 47 params['protocol'] = params.pop('flavor') 48 params['label'] = label or ( 49 ( 50 (params['username'] + '@' if 'username' in params else '') 51 + params['host'] 52 ).lower() 53 ) 54 55 return cls(**params) if not as_dict else params
Create a new APIConnector from a URI string.
Parameters
- uri (str): The URI connection string.
- label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
- as_dict (bool, default False):
If
True, return a dictionary of the keyword arguments necessary to create a newAPIConnector, otherwise create a new object.
Returns
- A new APIConnector object or a dictionary of attributes (if
as_dictisTrue).
28def get_jobs(self, debug: bool = False) -> Dict[str, Job]: 29 """ 30 Return a dictionary of remote jobs. 31 """ 32 response = self.get(JOBS_ENDPOINT, debug=debug) 33 if not response: 34 warn(f"Failed to get remote jobs from {self}.") 35 return {} 36 return { 37 name: Job( 38 name, 39 job_meta['sysargs'], 40 executor_keys=str(self), 41 _properties=job_meta['daemon']['properties'] 42 ) 43 for name, job_meta in response.json().items() 44 }
Return a dictionary of remote jobs.
47def get_job(self, name: str, debug: bool = False) -> Job: 48 """ 49 Return a single Job object. 50 """ 51 metadata = self.get_job_metadata(name, debug=debug) 52 if not metadata: 53 raise ValueError(f"Job '{name}' does not exist.") 54 55 return Job( 56 name, 57 metadata['sysargs'], 58 executor_keys=str(self), 59 _properties=metadata['daemon']['properties'], 60 )
Return a single Job object.
63def get_job_metadata(self, name: str, debug: bool = False) -> Dict[str, Any]: 64 """ 65 Return the metadata for a single job. 66 """ 67 now = time.perf_counter() 68 _job_metadata_cache = self.__dict__.get('_job_metadata_cache', None) 69 _job_metadata_timestamp = ( 70 _job_metadata_cache.get(name, {}).get('timestamp', None) 71 ) if _job_metadata_cache is not None else None 72 73 if ( 74 _job_metadata_timestamp is not None 75 and (now - _job_metadata_timestamp) < JOB_METADATA_CACHE_SECONDS 76 ): 77 if debug: 78 dprint(f"Returning cached metadata for job '{name}'.") 79 return _job_metadata_cache[name]['metadata'] 80 81 response = self.get(JOBS_ENDPOINT + f"/{name}", debug=debug) 82 if not response: 83 if debug: 84 msg = ( 85 response.json()['detail'] 86 if 'detail' in response.text 87 else response.text 88 ) 89 warn(f"Failed to get metadata for job '{name}':\n{msg}") 90 return {} 91 92 metadata = response.json() 93 if _job_metadata_cache is None: 94 self._job_metadata_cache = {} 95 96 self._job_metadata_cache[name] = { 97 'timestamp': now, 98 'metadata': metadata, 99 } 100 return metadata
Return the metadata for a single job.
102def get_job_properties(self, name: str, debug: bool = False) -> Dict[str, Any]: 103 """ 104 Return the daemon properties for a single job. 105 """ 106 metadata = self.get_job_metadata(name, debug=debug) 107 return metadata.get('daemon', {}).get('properties', {})
Return the daemon properties for a single job.
149def get_job_exists(self, name: str, debug: bool = False) -> bool: 150 """ 151 Return whether a job exists. 152 """ 153 response = self.get(JOBS_ENDPOINT + f'/{name}/exists', debug=debug) 154 if not response: 155 warn(f"Failed to determine whether job '{name}' exists.") 156 return False 157 158 return response.json()
Return whether a job exists.
161def delete_job(self, name: str, debug: bool = False) -> SuccessTuple: 162 """ 163 Delete a job. 164 """ 165 response = self.delete(JOBS_ENDPOINT + f"/{name}", debug=debug) 166 if not response: 167 if 'detail' in response.text: 168 return False, response.json()['detail'] 169 170 return False, response.text 171 172 return tuple(response.json())
Delete a job.
175def start_job(self, name: str, debug: bool = False) -> SuccessTuple: 176 """ 177 Start a job. 178 """ 179 response = self.post(JOBS_ENDPOINT + f"/{name}/start", debug=debug) 180 if not response: 181 if 'detail' in response.text: 182 return False, response.json()['detail'] 183 return False, response.text 184 185 return tuple(response.json())
Start a job.
188def create_job( 189 self, 190 name: str, 191 sysargs: List[str], 192 properties: Optional[Dict[str, str]] = None, 193 debug: bool = False, 194) -> SuccessTuple: 195 """ 196 Create a job. 197 """ 198 response = self.post( 199 JOBS_ENDPOINT + f"/{name}", 200 json={ 201 'sysargs': sysargs, 202 'properties': properties, 203 }, 204 debug=debug, 205 ) 206 if not response: 207 if 'detail' in response.text: 208 return False, response.json()['detail'] 209 return False, response.text 210 211 return tuple(response.json())
Create a job.
214def stop_job(self, name: str, debug: bool = False) -> SuccessTuple: 215 """ 216 Stop a job. 217 """ 218 response = self.post(JOBS_ENDPOINT + f"/{name}/stop", debug=debug) 219 if not response: 220 if 'detail' in response.text: 221 return False, response.json()['detail'] 222 return False, response.text 223 224 return tuple(response.json())
Stop a job.
227def pause_job(self, name: str, debug: bool = False) -> SuccessTuple: 228 """ 229 Pause a job. 230 """ 231 response = self.post(JOBS_ENDPOINT + f"/{name}/pause", debug=debug) 232 if not response: 233 if 'detail' in response.text: 234 return False, response.json()['detail'] 235 return False, response.text 236 237 return tuple(response.json())
Pause a job.
240def get_logs(self, name: str, debug: bool = False) -> str: 241 """ 242 Return the logs for a job. 243 """ 244 response = self.get(LOGS_ENDPOINT + f"/{name}") 245 if not response: 246 raise ValueError(f"Cannot fetch logs for job '{name}':\n{response.text}") 247 248 return response.json()
Return the logs for a job.
251def get_job_stop_time(self, name: str, debug: bool = False) -> Union[datetime, None]: 252 """ 253 Return the job's manual stop time. 254 """ 255 response = self.get(JOBS_ENDPOINT + f"/{name}/stop_time") 256 if not response: 257 warn(f"Failed to get stop time for job '{name}':\n{response.text}") 258 return None 259 260 data = response.json() 261 if data is None: 262 return None 263 264 return datetime.fromisoformat(data)
Return the job's manual stop time.
348def monitor_logs( 349 self, 350 name: str, 351 callback_function: Callable[[Any], Any], 352 input_callback_function: Callable[[None], str], 353 stop_callback_function: Callable[[None], str], 354 stop_on_exit: bool = False, 355 strip_timestamps: bool = False, 356 accept_input: bool = True, 357 debug: bool = False, 358): 359 """ 360 Monitor a job's log files and execute a callback with the changes. 361 """ 362 return asyncio.run( 363 self.monitor_logs_async( 364 name, 365 callback_function, 366 input_callback_function=input_callback_function, 367 stop_callback_function=stop_callback_function, 368 stop_on_exit=stop_on_exit, 369 strip_timestamps=strip_timestamps, 370 accept_input=accept_input, 371 debug=debug 372 ) 373 )
Monitor a job's log files and execute a callback with the changes.
267async def monitor_logs_async( 268 self, 269 name: str, 270 callback_function: Callable[[Any], Any], 271 input_callback_function: Callable[[], str], 272 stop_callback_function: Callable[[SuccessTuple], str], 273 stop_on_exit: bool = False, 274 strip_timestamps: bool = False, 275 accept_input: bool = True, 276 debug: bool = False, 277): 278 """ 279 Monitor a job's log files and await a callback with the changes. 280 """ 281 import traceback 282 from meerschaum.jobs import StopMonitoringLogs 283 from meerschaum.utils.formatting._jobs import strip_timestamp_from_line 284 285 websockets, websockets_exceptions = mrsm.attempt_import('websockets', 'websockets.exceptions') 286 protocol = 'ws' if self.URI.startswith('http://') else 'wss' 287 port = self.port if 'port' in self.__dict__ else '' 288 uri = f"{protocol}://{self.host}:{port}{LOGS_ENDPOINT}/{name}/ws" 289 290 async def _stdin_callback(client): 291 if input_callback_function is None: 292 return 293 294 if asyncio.iscoroutinefunction(input_callback_function): 295 data = await input_callback_function() 296 else: 297 data = input_callback_function() 298 299 await client.send(data) 300 301 async def _stop_callback(client): 302 try: 303 result = tuple(json.loads(await client.recv())) 304 except Exception as e: 305 warn(traceback.format_exc()) 306 result = False, str(e) 307 308 if stop_callback_function is not None: 309 if asyncio.iscoroutinefunction(stop_callback_function): 310 await stop_callback_function(result) 311 else: 312 stop_callback_function(result) 313 314 if stop_on_exit: 315 raise StopMonitoringLogs 316 317 message_callbacks = { 318 JOBS_STDIN_MESSAGE: _stdin_callback, 319 JOBS_STOP_MESSAGE: _stop_callback, 320 } 321 322 async with websockets.connect(uri) as websocket: 323 try: 324 await websocket.send(self.token or 'no-login') 325 except websockets_exceptions.ConnectionClosedOK: 326 pass 327 328 while True: 329 try: 330 response = await websocket.recv() 331 callback = message_callbacks.get(response, None) 332 if callback is not None: 333 await callback(websocket) 334 continue 335 336 if strip_timestamps: 337 response = strip_timestamp_from_line(response) 338 339 if asyncio.iscoroutinefunction(callback_function): 340 await callback_function(response) 341 else: 342 callback_function(response) 343 except (KeyboardInterrupt, StopMonitoringLogs): 344 await websocket.close() 345 break
Monitor a job's log files and await a callback with the changes.
375def get_job_is_blocking_on_stdin(self, name: str, debug: bool = False) -> bool: 376 """ 377 Return whether a remote job is blocking on stdin. 378 """ 379 response = self.get(JOBS_ENDPOINT + f'/{name}/is_blocking_on_stdin', debug=debug) 380 if not response: 381 return False 382 383 return response.json()
Return whether a remote job is blocking on stdin.
116def get_job_began(self, name: str, debug: bool = False) -> Union[str, None]: 117 """ 118 Return a job's `began` timestamp, if it exists. 119 """ 120 properties = self.get_job_properties(name, debug=debug) 121 began_str = properties.get('daemon', {}).get('began', None) 122 if began_str is None: 123 return None 124 125 return began_str
Return a job's began timestamp, if it exists.
127def get_job_ended(self, name: str, debug: bool = False) -> Union[str, None]: 128 """ 129 Return a job's `ended` timestamp, if it exists. 130 """ 131 properties = self.get_job_properties(name, debug=debug) 132 ended_str = properties.get('daemon', {}).get('ended', None) 133 if ended_str is None: 134 return None 135 136 return ended_str
Return a job's ended timestamp, if it exists.
138def get_job_paused(self, name: str, debug: bool = False) -> Union[str, None]: 139 """ 140 Return a job's `paused` timestamp, if it exists. 141 """ 142 properties = self.get_job_properties(name, debug=debug) 143 paused_str = properties.get('daemon', {}).get('paused', None) 144 if paused_str is None: 145 return None 146 147 return paused_str
Return a job's paused timestamp, if it exists.
109def get_job_status(self, name: str, debug: bool = False) -> str: 110 """ 111 Return the job's status. 112 """ 113 metadata = self.get_job_metadata(name, debug=debug) 114 return metadata.get('status', 'stopped')
Return the job's status.
68def get_connector( 69 type: str = None, 70 label: str = None, 71 refresh: bool = False, 72 debug: bool = False, 73 _load_plugins: bool = True, 74 **kw: Any 75) -> Connector: 76 """ 77 Return existing connector or create new connection and store for reuse. 78 79 You can create new connectors if enough parameters are provided for the given type and flavor. 80 81 Parameters 82 ---------- 83 type: Optional[str], default None 84 Connector type (sql, api, etc.). 85 Defaults to the type of the configured `instance_connector`. 86 87 label: Optional[str], default None 88 Connector label (e.g. main). Defaults to `'main'`. 89 90 refresh: bool, default False 91 Refresh the Connector instance / construct new object. Defaults to `False`. 92 93 kw: Any 94 Other arguments to pass to the Connector constructor. 95 If the Connector has already been constructed and new arguments are provided, 96 `refresh` is set to `True` and the old Connector is replaced. 97 98 Returns 99 ------- 100 A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`, 101 `meerschaum.connectors.sql.SQLConnector`). 102 103 Examples 104 -------- 105 The following parameters would create a new 106 `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file. 107 108 ``` 109 >>> conn = get_connector( 110 ... type = 'sql', 111 ... label = 'newlabel', 112 ... flavor = 'sqlite', 113 ... database = '/file/path/to/database.db' 114 ... ) 115 >>> 116 ``` 117 118 """ 119 from meerschaum.connectors.parse import parse_instance_keys 120 from meerschaum.config import get_config 121 from meerschaum._internal.static import STATIC_CONFIG 122 from meerschaum.utils.warnings import warn 123 global _loaded_plugin_connectors 124 if isinstance(type, str) and not label and ':' in type: 125 type, label = type.split(':', maxsplit=1) 126 127 if _load_plugins: 128 with _locks['_loaded_plugin_connectors']: 129 if not _loaded_plugin_connectors: 130 load_plugin_connectors() 131 _load_builtin_custom_connectors() 132 _loaded_plugin_connectors = True 133 134 if type is None and label is None: 135 default_instance_keys = get_config('meerschaum', 'instance', patch=True) 136 ### recursive call to get_connector 137 return parse_instance_keys(default_instance_keys) 138 139 ### NOTE: the default instance connector may not be main. 140 ### Only fall back to 'main' if the type is provided by the label is omitted. 141 label = label if label is not None else STATIC_CONFIG['connectors']['default_label'] 142 143 ### type might actually be a label. Check if so and raise a warning. 144 if type not in connectors: 145 possibilities, poss_msg = [], "" 146 for _type in get_config('meerschaum', 'connectors'): 147 if type in get_config('meerschaum', 'connectors', _type): 148 possibilities.append(f"{_type}:{type}") 149 if len(possibilities) > 0: 150 poss_msg = " Did you mean" 151 for poss in possibilities[:-1]: 152 poss_msg += f" '{poss}'," 153 if poss_msg.endswith(','): 154 poss_msg = poss_msg[:-1] 155 if len(possibilities) > 1: 156 poss_msg += " or" 157 poss_msg += f" '{possibilities[-1]}'?" 158 159 warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False) 160 return None 161 162 if 'sql' not in types: 163 from meerschaum.connectors.plugin import PluginConnector 164 from meerschaum.connectors.valkey import ValkeyConnector 165 with _locks['types']: 166 types.update({ 167 'api': APIConnector, 168 'sql': SQLConnector, 169 'plugin': PluginConnector, 170 'valkey': ValkeyConnector, 171 }) 172 173 ### determine if we need to call the constructor 174 if not refresh: 175 ### see if any user-supplied arguments differ from the existing instance 176 if label in connectors[type]: 177 warning_message = None 178 for attribute, value in kw.items(): 179 if attribute not in connectors[type][label].meta: 180 import inspect 181 cls = connectors[type][label].__class__ 182 cls_init_signature = inspect.signature(cls) 183 cls_init_params = cls_init_signature.parameters 184 if attribute not in cls_init_params: 185 warning_message = ( 186 f"Received new attribute '{attribute}' not present in connector " + 187 f"{connectors[type][label]}.\n" 188 ) 189 elif connectors[type][label].__dict__[attribute] != value: 190 warning_message = ( 191 f"Mismatched values for attribute '{attribute}' in connector " 192 + f"'{connectors[type][label]}'.\n" + 193 f" - Keyword value: '{value}'\n" + 194 f" - Existing value: '{connectors[type][label].__dict__[attribute]}'\n" 195 ) 196 if warning_message is not None: 197 warning_message += ( 198 "\nSetting `refresh` to True and recreating connector with type:" 199 + f" '{type}' and label '{label}'." 200 ) 201 refresh = True 202 warn(warning_message) 203 else: ### connector doesn't yet exist 204 refresh = True 205 206 ### only create an object if refresh is True 207 ### (can be manually specified, otherwise determined above) 208 if refresh: 209 with _locks['connectors']: 210 try: 211 ### will raise an error if configuration is incorrect / missing 212 conn = types[type](label=label, **kw) 213 connectors[type][label] = conn 214 except InvalidAttributesError as ie: 215 warn( 216 f"Incorrect attributes for connector '{type}:{label}'.\n" 217 + str(ie), 218 stack = False, 219 ) 220 conn = None 221 except Exception as e: 222 from meerschaum.utils.formatting import get_console 223 console = get_console() 224 if console: 225 console.print_exception() 226 warn( 227 f"Exception when creating connector '{type}:{label}'.\n" + str(e), 228 stack = False, 229 ) 230 conn = None 231 if conn is None: 232 return None 233 234 return connectors[type][label]
Return existing connector or create new connection and store for reuse.
You can create new connectors if enough parameters are provided for the given type and flavor.
Parameters
- type (Optional[str], default None):
Connector type (sql, api, etc.).
Defaults to the type of the configured
instance_connector. - label (Optional[str], default None):
Connector label (e.g. main). Defaults to
'main'. - refresh (bool, default False):
Refresh the Connector instance / construct new object. Defaults to
False. - kw (Any):
Other arguments to pass to the Connector constructor.
If the Connector has already been constructed and new arguments are provided,
refreshis set toTrueand the old Connector is replaced.
Returns
- A new Meerschaum connector (e.g.
meerschaum.connectors.api.APIConnector, meerschaum.connectors.sql.SQLConnector).
Examples
The following parameters would create a new
meerschaum.connectors.sql.SQLConnector that isn't in the configuration file.
>>> conn = get_connector(
... type = 'sql',
... label = 'newlabel',
... flavor = 'sqlite',
... database = '/file/path/to/database.db'
... )
>>>
237def is_connected(keys: str, **kw) -> bool: 238 """ 239 Check if the connector keys correspond to an active connection. 240 If the connector has not been created, it will immediately return `False`. 241 If the connector exists but cannot communicate with the source, return `False`. 242 243 **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`). 244 Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`. 245 246 Parameters 247 ---------- 248 keys: 249 The keys to the connector (e.g. `'sql:main'`). 250 251 Returns 252 ------- 253 A `bool` corresponding to whether a successful connection may be made. 254 255 """ 256 import warnings 257 if ':' not in keys: 258 warn(f"Invalid connector keys '{keys}'") 259 260 try: 261 typ, label = keys.split(':') 262 except Exception: 263 return False 264 if typ not in instance_types: 265 return False 266 if label not in connectors.get(typ, {}): 267 return False 268 269 from meerschaum.connectors.parse import parse_instance_keys 270 conn = parse_instance_keys(keys) 271 try: 272 with warnings.catch_warnings(): 273 warnings.filterwarnings('ignore') 274 return conn.test_connection(**kw) 275 except Exception: 276 return False
Check if the connector keys correspond to an active connection.
If the connector has not been created, it will immediately return False.
If the connector exists but cannot communicate with the source, return False.
NOTE: Only works with instance connectors (SQLConnectors and APIConnectors).
Keyword arguments are passed to meerschaum.connectors.poll.retry_connect.
Parameters
- keys:: The keys to the connector (e.g.
'sql:main').
Returns
- A
boolcorresponding to whether a successful connection may be made.