meerschaum.connectors

Create connectors with meerschaum.connectors.get_connector(). For ease of use, you can also import from the root meerschaum module:

>>> from meerschaum import get_connector
>>> conn = get_connector()
  1#! /usr/bin/env python
  2# -*- coding: utf-8 -*-
  3# vim:fenc=utf-8
  4
  5"""
  6Create connectors with `meerschaum.connectors.get_connector()`.
  7For ease of use, you can also import from the root `meerschaum` module:
  8```
  9>>> from meerschaum import get_connector
 10>>> conn = get_connector()
 11```
 12"""
 13
 14from __future__ import annotations
 15
 16import meerschaum as mrsm
 17from meerschaum.utils.typing import Any, Union, List, Dict, Optional
 18from meerschaum.utils.threading import RLock
 19from meerschaum.utils.warnings import warn
 20
 21from meerschaum.connectors._Connector import Connector, InvalidAttributesError
 22from meerschaum.connectors.instance._InstanceConnector import InstanceConnector
 23from meerschaum.connectors.sql._SQLConnector import SQLConnector
 24from meerschaum.connectors.api._APIConnector import APIConnector
 25
 26__all__ = (
 27    "make_connector",
 28    "Connector",
 29    "InstanceConnector",
 30    "SQLConnector",
 31    "APIConnector",
 32    "get_connector",
 33    "is_connected",
 34    "poll",
 35    "api",
 36    "sql",
 37    "valkey",
 38    "parse",
 39)
 40
 41### store connectors partitioned by
 42### type, label for reuse
 43connectors: Dict[str, Dict[str, Connector]] = {
 44    'api'    : {},
 45    'sql'    : {},
 46    'plugin' : {},
 47    'valkey' : {},
 48}
 49instance_types: List[str] = ['sql', 'api']
 50_locks: Dict[str, RLock] = {
 51    'connectors'               : RLock(),
 52    'types'                    : RLock(),
 53    'custom_types'             : RLock(),
 54    'plugins_types'            : RLock(),
 55    '_loaded_plugin_connectors': RLock(),
 56    'instance_types'           : RLock(),
 57}
 58
 59### Fill this with objects only when connectors are first referenced.
 60types: Dict[str, Any] = {}
 61custom_types: set = set()
 62plugins_types: Dict[str, List[str]] = {}
 63_known_custom_types: set = set()
 64_loaded_plugin_connectors: bool = False
 65
 66
 67def get_connector(
 68    type: str = None,
 69    label: str = None,
 70    refresh: bool = False,
 71    debug: bool = False,
 72    _load_plugins: bool = True,
 73    **kw: Any
 74) -> Connector:
 75    """
 76    Return existing connector or create new connection and store for reuse.
 77    
 78    You can create new connectors if enough parameters are provided for the given type and flavor.
 79
 80    Parameters
 81    ----------
 82    type: Optional[str], default None
 83        Connector type (sql, api, etc.).
 84        Defaults to the type of the configured `instance_connector`.
 85
 86    label: Optional[str], default None
 87        Connector label (e.g. main). Defaults to `'main'`.
 88
 89    refresh: bool, default False
 90        Refresh the Connector instance / construct new object. Defaults to `False`.
 91
 92    kw: Any
 93        Other arguments to pass to the Connector constructor.
 94        If the Connector has already been constructed and new arguments are provided,
 95        `refresh` is set to `True` and the old Connector is replaced.
 96
 97    Returns
 98    -------
 99    A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`,
100    `meerschaum.connectors.sql.SQLConnector`).
101    
102    Examples
103    --------
104    The following parameters would create a new
105    `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file.
106
107    ```
108    >>> conn = get_connector(
109    ...     type = 'sql',
110    ...     label = 'newlabel',
111    ...     flavor = 'sqlite',
112    ...     database = '/file/path/to/database.db'
113    ... )
114    >>>
115    ```
116
117    """
118    from meerschaum.connectors.parse import parse_instance_keys
119    from meerschaum.config import get_config
120    from meerschaum._internal.static import STATIC_CONFIG
121    from meerschaum.utils.warnings import warn
122    global _loaded_plugin_connectors
123    if isinstance(type, str) and not label and ':' in type:
124        type, label = type.split(':', maxsplit=1)
125
126    if _load_plugins:
127        with _locks['_loaded_plugin_connectors']:
128            if not _loaded_plugin_connectors:
129                load_plugin_connectors()
130                _load_builtin_custom_connectors()
131                _loaded_plugin_connectors = True
132
133    if type is None and label is None:
134        default_instance_keys = get_config('meerschaum', 'instance', patch=True)
135        ### recursive call to get_connector
136        return parse_instance_keys(default_instance_keys)
137
138    ### NOTE: the default instance connector may not be main.
139    ### Only fall back to 'main' if the type is provided by the label is omitted.
140    label = label if label is not None else STATIC_CONFIG['connectors']['default_label']
141
142    ### type might actually be a label. Check if so and raise a warning.
143    if type not in connectors:
144        possibilities, poss_msg = [], ""
145        for _type in get_config('meerschaum', 'connectors'):
146            if type in get_config('meerschaum', 'connectors', _type):
147                possibilities.append(f"{_type}:{type}")
148        if len(possibilities) > 0:
149            poss_msg = " Did you mean"
150            for poss in possibilities[:-1]:
151                poss_msg += f" '{poss}',"
152            if poss_msg.endswith(','):
153                poss_msg = poss_msg[:-1]
154            if len(possibilities) > 1:
155                poss_msg += " or"
156            poss_msg += f" '{possibilities[-1]}'?"
157
158        warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False)
159        return None
160
161    if 'sql' not in types:
162        from meerschaum.connectors.plugin import PluginConnector
163        from meerschaum.connectors.valkey import ValkeyConnector
164        with _locks['types']:
165            types.update({
166                'api': APIConnector,
167                'sql': SQLConnector,
168                'plugin': PluginConnector,
169                'valkey': ValkeyConnector,
170            })
171
172    ### determine if we need to call the constructor
173    if not refresh:
174        ### see if any user-supplied arguments differ from the existing instance
175        if label in connectors[type]:
176            warning_message = None
177            for attribute, value in kw.items():
178                if attribute not in connectors[type][label].meta:
179                    import inspect
180                    cls = connectors[type][label].__class__
181                    cls_init_signature = inspect.signature(cls)
182                    cls_init_params = cls_init_signature.parameters
183                    if attribute not in cls_init_params:
184                        warning_message = (
185                            f"Received new attribute '{attribute}' not present in connector " +
186                            f"{connectors[type][label]}.\n"
187                        )
188                elif connectors[type][label].__dict__[attribute] != value:
189                    warning_message = (
190                        f"Mismatched values for attribute '{attribute}' in connector "
191                        + f"'{connectors[type][label]}'.\n" +
192                        f"  - Keyword value: '{value}'\n" +
193                        f"  - Existing value: '{connectors[type][label].__dict__[attribute]}'\n"
194                    )
195            if warning_message is not None:
196                warning_message += (
197                    "\nSetting `refresh` to True and recreating connector with type:"
198                    + f" '{type}' and label '{label}'."
199                )
200                refresh = True
201                warn(warning_message)
202        else: ### connector doesn't yet exist
203            refresh = True
204
205    ### only create an object if refresh is True
206    ### (can be manually specified, otherwise determined above)
207    if refresh:
208        with _locks['connectors']:
209            try:
210                ### will raise an error if configuration is incorrect / missing
211                conn = types[type](label=label, **kw)
212                connectors[type][label] = conn
213            except InvalidAttributesError as ie:
214                warn(
215                    f"Incorrect attributes for connector '{type}:{label}'.\n"
216                    + str(ie),
217                    stack = False,
218                )
219                conn = None
220            except Exception as e:
221                from meerschaum.utils.formatting import get_console
222                console = get_console()
223                if console:
224                    console.print_exception()
225                warn(
226                    f"Exception when creating connector '{type}:{label}'.\n" + str(e),
227                    stack = False,
228                )
229                conn = None
230        if conn is None:
231            return None
232
233    return connectors[type][label]
234
235
236def is_connected(keys: str, **kw) -> bool:
237    """
238    Check if the connector keys correspond to an active connection.
239    If the connector has not been created, it will immediately return `False`.
240    If the connector exists but cannot communicate with the source, return `False`.
241    
242    **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`).
243    Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`.
244
245    Parameters
246    ----------
247    keys:
248        The keys to the connector (e.g. `'sql:main'`).
249        
250    Returns
251    -------
252    A `bool` corresponding to whether a successful connection may be made.
253
254    """
255    import warnings
256    if ':' not in keys:
257        warn(f"Invalid connector keys '{keys}'")
258
259    try:
260        typ, label = keys.split(':')
261    except Exception:
262        return False
263    if typ not in instance_types:
264        return False
265    if label not in connectors.get(typ, {}):
266        return False
267
268    from meerschaum.connectors.parse import parse_instance_keys
269    conn = parse_instance_keys(keys)
270    try:
271        with warnings.catch_warnings():
272            warnings.filterwarnings('ignore')
273            return conn.test_connection(**kw)
274    except Exception:
275        return False
276
277
278def make_connector(cls, _is_executor: bool = False):
279    """
280    Register a class as a `Connector`.
281    The `type` will be the lower case of the class name, without the suffix `connector`.
282
283    Parameters
284    ----------
285    instance: bool, default False
286        If `True`, make this connector type an instance connector.
287        This requires implementing the various pipes functions and lots of testing.
288
289    Examples
290    --------
291    >>> import meerschaum as mrsm
292    >>> from meerschaum.connectors import make_connector, Connector
293    >>> 
294    >>> @make_connector
295    >>> class FooConnector(Connector):
296    ...     REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
297    ... 
298    >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
299    >>> print(conn.username, conn.password)
300    dog cat
301    >>> 
302    """
303    import re
304    from meerschaum.plugins import _get_parent_plugin
305    suffix_regex = (
306        r'connector$'
307        if not _is_executor
308        else r'executor$'
309    )
310    plugin_name = _get_parent_plugin(2)
311    typ = re.sub(suffix_regex, '', cls.__name__.lower())
312    with _locks['types']:
313        types[typ] = cls
314    with _locks['custom_types']:
315        custom_types.add(typ)
316    if plugin_name:
317        with _locks['plugins_types']:
318            if plugin_name not in plugins_types:
319                plugins_types[plugin_name] = []
320            plugins_types[plugin_name].append(typ)
321    with _locks['connectors']:
322        if typ not in connectors:
323            connectors[typ] = {}
324    if getattr(cls, 'IS_INSTANCE', False):
325        with _locks['instance_types']:
326            if typ not in instance_types:
327                instance_types.append(typ)
328
329    return cls
330
331
332def load_plugin_connectors():
333    """
334    If a plugin makes use of the `make_connector` decorator,
335    load its module.
336    """
337    from meerschaum.plugins import get_plugins, import_plugins
338    to_import = []
339    for plugin in get_plugins():
340        if plugin is None:
341            continue
342
343        with open(plugin.__file__, encoding='utf-8') as f:
344            text = f.read()
345
346        if 'make_connector' in text or 'Connector' in text:
347            to_import.append(plugin.name)
348
349    if not to_import:
350        return
351
352    import_plugins(*to_import)
353
354
355def unload_plugin_connectors(
356    plugin_names: Optional[List[str]] = None,
357    debug: bool = False,
358) -> None:
359    """
360    Unload custom connectors added by plugins.
361    """
362    from meerschaum.plugins import get_plugins_names
363    global custom_types, _known_custom_types, types, plugins_types, connectors, _loaded_plugin_connectors
364
365    plugin_names = plugin_names or get_plugins_names()
366
367    for plugin_name in plugin_names:
368        plugin_types = plugins_types.get(plugin_name, [])
369        for typ in plugin_types:
370            _ = types.pop(typ, None)
371            _ = connectors.pop(typ, None)
372            if typ in instance_types:
373                instance_types.remove(typ)
374
375    custom_types.clear()
376    custom_types.update(_known_custom_types)
377    _loaded_plugin_connectors = False
378
379
380def get_connector_plugin(
381    connector: Connector,
382) -> Union[str, None, mrsm.Plugin]:
383    """
384    Determine the plugin for a connector.
385    This is useful for handling virtual environments for custom instance connectors.
386
387    Parameters
388    ----------
389    connector: Connector
390        The connector which may require a virtual environment.
391
392    Returns
393    -------
394    A Plugin, 'mrsm', or None.
395    """
396    if not hasattr(connector, 'type'):
397        return None
398    plugin_name = (
399        connector.__module__.replace('plugins.', '').split('.')[0]
400        if connector.type in custom_types else (
401            connector.label
402            if connector.type == 'plugin'
403            else 'mrsm'
404        )
405    )
406    plugin = mrsm.Plugin(plugin_name)
407    return plugin if plugin.is_installed() else None
408
409
410def _load_builtin_custom_connectors():
411    """
412    Import custom connectors decorated with `@make_connector` or `@make_executor`.
413    """
414    import meerschaum.jobs.systemd
415    import meerschaum.connectors.valkey
416    _known_custom_types.add('valkey')
417    _known_custom_types.add('systemd')
def make_connector(cls, _is_executor: bool = False):
279def make_connector(cls, _is_executor: bool = False):
280    """
281    Register a class as a `Connector`.
282    The `type` will be the lower case of the class name, without the suffix `connector`.
283
284    Parameters
285    ----------
286    instance: bool, default False
287        If `True`, make this connector type an instance connector.
288        This requires implementing the various pipes functions and lots of testing.
289
290    Examples
291    --------
292    >>> import meerschaum as mrsm
293    >>> from meerschaum.connectors import make_connector, Connector
294    >>> 
295    >>> @make_connector
296    >>> class FooConnector(Connector):
297    ...     REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
298    ... 
299    >>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
300    >>> print(conn.username, conn.password)
301    dog cat
302    >>> 
303    """
304    import re
305    from meerschaum.plugins import _get_parent_plugin
306    suffix_regex = (
307        r'connector$'
308        if not _is_executor
309        else r'executor$'
310    )
311    plugin_name = _get_parent_plugin(2)
312    typ = re.sub(suffix_regex, '', cls.__name__.lower())
313    with _locks['types']:
314        types[typ] = cls
315    with _locks['custom_types']:
316        custom_types.add(typ)
317    if plugin_name:
318        with _locks['plugins_types']:
319            if plugin_name not in plugins_types:
320                plugins_types[plugin_name] = []
321            plugins_types[plugin_name].append(typ)
322    with _locks['connectors']:
323        if typ not in connectors:
324            connectors[typ] = {}
325    if getattr(cls, 'IS_INSTANCE', False):
326        with _locks['instance_types']:
327            if typ not in instance_types:
328                instance_types.append(typ)
329
330    return cls

Register a class as a Connector. The type will be the lower case of the class name, without the suffix connector.

Parameters
  • instance (bool, default False): If True, make this connector type an instance connector. This requires implementing the various pipes functions and lots of testing.
Examples
>>> import meerschaum as mrsm
>>> from meerschaum.connectors import make_connector, Connector
>>> 
>>> @make_connector
>>> class FooConnector(Connector):
...     REQUIRED_ATTRIBUTES: list[str] = ['username', 'password']
... 
>>> conn = mrsm.get_connector('foo:bar', username='dog', password='cat')
>>> print(conn.username, conn.password)
dog cat
>>>
class Connector:
 22class Connector(metaclass=abc.ABCMeta):
 23    """
 24    The base connector class to hold connection attributes.
 25    """
 26
 27    IS_INSTANCE: bool = False
 28
 29    def __init__(
 30        self,
 31        type: Optional[str] = None,
 32        label: Optional[str] = None,
 33        **kw: Any
 34    ):
 35        """
 36        Set the given keyword arguments as attributes.
 37
 38        Parameters
 39        ----------
 40        type: str
 41            The `type` of the connector (e.g. `sql`, `api`, `plugin`).
 42
 43        label: str
 44            The `label` for the connector.
 45
 46
 47        Examples
 48        --------
 49        Run `mrsm edit config` and to edit connectors in the YAML file:
 50
 51        ```yaml
 52        meerschaum:
 53            connections:
 54                {type}:
 55                    {label}:
 56                        ### attributes go here
 57        ```
 58
 59        """
 60        self._original_dict = copy.deepcopy(self.__dict__)
 61        self._set_attributes(type=type, label=label, **kw)
 62
 63        ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set.
 64        self.verify_attributes(
 65            ['uri']
 66            if 'uri' in self.__dict__
 67            else getattr(self, 'REQUIRED_ATTRIBUTES', None)
 68        )
 69
 70    def _reset_attributes(self):
 71        self.__dict__ = self._original_dict
 72
 73    def _set_attributes(
 74        self,
 75        *args,
 76        inherit_default: bool = True,
 77        **kw: Any
 78    ):
 79        from meerschaum._internal.static import STATIC_CONFIG
 80        from meerschaum.utils.warnings import error
 81
 82        self._attributes = {}
 83
 84        default_label = STATIC_CONFIG['connectors']['default_label']
 85
 86        ### NOTE: Support the legacy method of explicitly passing the type.
 87        label = kw.get('label', None)
 88        if label is None:
 89            if len(args) == 2:
 90                label = args[1]
 91            elif len(args) == 0:
 92                label = None
 93            else:
 94                label = args[0]
 95
 96        if label == 'default':
 97            error(
 98                f"Label cannot be 'default'. Did you mean '{default_label}'?",
 99                InvalidAttributesError,
100            )
101        self.__dict__['label'] = label
102
103        from meerschaum.config import get_config
104        conn_configs = copy.deepcopy(get_config('meerschaum', 'connectors'))
105        connector_config = copy.deepcopy(get_config('system', 'connectors'))
106
107        ### inherit attributes from 'default' if exists
108        if inherit_default:
109            inherit_from = 'default'
110            if self.type in conn_configs and inherit_from in conn_configs[self.type]:
111                _inherit_dict = copy.deepcopy(conn_configs[self.type][inherit_from])
112                self._attributes.update(_inherit_dict)
113
114        ### load user config into self._attributes
115        if self.type in conn_configs and self.label in conn_configs[self.type]:
116            self._attributes.update(conn_configs[self.type][self.label] or {})
117
118        ### load system config into self._sys_config
119        ### (deep copy so future Connectors don't inherit changes)
120        if self.type in connector_config:
121            self._sys_config = copy.deepcopy(connector_config[self.type])
122
123        ### add additional arguments or override configuration
124        self._attributes.update(kw)
125
126        ### finally, update __dict__ with _attributes.
127        self.__dict__.update(self._attributes)
128
129    def verify_attributes(
130        self,
131        required_attributes: Optional[List[str]] = None,
132        debug: bool = False,
133    ) -> None:
134        """
135        Ensure that the required attributes have been met.
136        
137        The Connector base class checks the minimum requirements.
138        Child classes may enforce additional requirements.
139
140        Parameters
141        ----------
142        required_attributes: Optional[List[str]], default None
143            Attributes to be verified. If `None`, default to `['label']`.
144
145        debug: bool, default False
146            Verbosity toggle.
147
148        Returns
149        -------
150        Don't return anything.
151
152        Raises
153        ------
154        An error if any of the required attributes are missing.
155        """
156        from meerschaum.utils.warnings import error
157        from meerschaum.utils.misc import items_str
158        if required_attributes is None:
159            required_attributes = ['type', 'label']
160
161        missing_attributes = set()
162        for a in required_attributes:
163            if a not in self.__dict__:
164                missing_attributes.add(a)
165        if len(missing_attributes) > 0:
166            error(
167                (
168                    f"Missing {items_str(list(missing_attributes))} "
169                    + f"for connector '{self.type}:{self.label}'."
170                ),
171                InvalidAttributesError,
172                silent=True,
173                stack=False
174            )
175
176
177    def __str__(self):
178        """
179        When cast to a string, return type:label.
180        """
181        return f"{self.type}:{self.label}"
182
183    def __repr__(self):
184        """
185        Represent the connector as type:label.
186        """
187        return str(self)
188
189    @property
190    def meta(self) -> Dict[str, Any]:
191        """
192        Return the keys needed to reconstruct this Connector.
193        """
194        _meta = {
195            key: value
196            for key, value in self.__dict__.items()
197            if not str(key).startswith('_')
198        }
199        _meta.update({
200            'type': self.type,
201            'label': self.label,
202        })
203        return _meta
204
205
206    @property
207    def type(self) -> str:
208        """
209        Return the type for this connector.
210        """
211        _type = self.__dict__.get('type', None)
212        if _type is None:
213            import re
214            is_executor = self.__class__.__name__.lower().endswith('executor')
215            suffix_regex = (
216                r'connector$'
217                if not is_executor
218                else r'executor$'
219            )
220            _type = re.sub(suffix_regex, '', self.__class__.__name__.lower())
221            if not _type or _type.lower() == 'instance':
222                raise ValueError("No type could be determined for this connector.")
223            self.__dict__['type'] = _type
224        return _type
225
226
227    @property
228    def label(self) -> str:
229        """
230        Return the label for this connector.
231        """
232        _label = self.__dict__.get('label', None)
233        if _label is None:
234            from meerschaum._internal.static import STATIC_CONFIG
235            _label = STATIC_CONFIG['connectors']['default_label']
236            self.__dict__['label'] = _label
237        return _label

The base connector class to hold connection attributes.

Connector(type: Optional[str] = None, label: Optional[str] = None, **kw: Any)
29    def __init__(
30        self,
31        type: Optional[str] = None,
32        label: Optional[str] = None,
33        **kw: Any
34    ):
35        """
36        Set the given keyword arguments as attributes.
37
38        Parameters
39        ----------
40        type: str
41            The `type` of the connector (e.g. `sql`, `api`, `plugin`).
42
43        label: str
44            The `label` for the connector.
45
46
47        Examples
48        --------
49        Run `mrsm edit config` and to edit connectors in the YAML file:
50
51        ```yaml
52        meerschaum:
53            connections:
54                {type}:
55                    {label}:
56                        ### attributes go here
57        ```
58
59        """
60        self._original_dict = copy.deepcopy(self.__dict__)
61        self._set_attributes(type=type, label=label, **kw)
62
63        ### NOTE: Override `REQUIRED_ATTRIBUTES` if `uri` is set.
64        self.verify_attributes(
65            ['uri']
66            if 'uri' in self.__dict__
67            else getattr(self, 'REQUIRED_ATTRIBUTES', None)
68        )

Set the given keyword arguments as attributes.

Parameters
  • type (str): The type of the connector (e.g. sql, api, plugin).
  • label (str): The label for the connector.
Examples

Run mrsm edit config and to edit connectors in the YAML file:

meerschaum:
    connections:
        {type}:
            {label}:
                ### attributes go here
IS_INSTANCE: bool = False
def verify_attributes( self, required_attributes: Optional[List[str]] = None, debug: bool = False) -> None:
129    def verify_attributes(
130        self,
131        required_attributes: Optional[List[str]] = None,
132        debug: bool = False,
133    ) -> None:
134        """
135        Ensure that the required attributes have been met.
136        
137        The Connector base class checks the minimum requirements.
138        Child classes may enforce additional requirements.
139
140        Parameters
141        ----------
142        required_attributes: Optional[List[str]], default None
143            Attributes to be verified. If `None`, default to `['label']`.
144
145        debug: bool, default False
146            Verbosity toggle.
147
148        Returns
149        -------
150        Don't return anything.
151
152        Raises
153        ------
154        An error if any of the required attributes are missing.
155        """
156        from meerschaum.utils.warnings import error
157        from meerschaum.utils.misc import items_str
158        if required_attributes is None:
159            required_attributes = ['type', 'label']
160
161        missing_attributes = set()
162        for a in required_attributes:
163            if a not in self.__dict__:
164                missing_attributes.add(a)
165        if len(missing_attributes) > 0:
166            error(
167                (
168                    f"Missing {items_str(list(missing_attributes))} "
169                    + f"for connector '{self.type}:{self.label}'."
170                ),
171                InvalidAttributesError,
172                silent=True,
173                stack=False
174            )

Ensure that the required attributes have been met.

The Connector base class checks the minimum requirements. Child classes may enforce additional requirements.

Parameters
  • required_attributes (Optional[List[str]], default None): Attributes to be verified. If None, default to ['label'].
  • debug (bool, default False): Verbosity toggle.
Returns
  • Don't return anything.
Raises
  • An error if any of the required attributes are missing.
meta: Dict[str, Any]
189    @property
190    def meta(self) -> Dict[str, Any]:
191        """
192        Return the keys needed to reconstruct this Connector.
193        """
194        _meta = {
195            key: value
196            for key, value in self.__dict__.items()
197            if not str(key).startswith('_')
198        }
199        _meta.update({
200            'type': self.type,
201            'label': self.label,
202        })
203        return _meta

Return the keys needed to reconstruct this Connector.

type: str
206    @property
207    def type(self) -> str:
208        """
209        Return the type for this connector.
210        """
211        _type = self.__dict__.get('type', None)
212        if _type is None:
213            import re
214            is_executor = self.__class__.__name__.lower().endswith('executor')
215            suffix_regex = (
216                r'connector$'
217                if not is_executor
218                else r'executor$'
219            )
220            _type = re.sub(suffix_regex, '', self.__class__.__name__.lower())
221            if not _type or _type.lower() == 'instance':
222                raise ValueError("No type could be determined for this connector.")
223            self.__dict__['type'] = _type
224        return _type

Return the type for this connector.

label: str
227    @property
228    def label(self) -> str:
229        """
230        Return the label for this connector.
231        """
232        _label = self.__dict__.get('label', None)
233        if _label is None:
234            from meerschaum._internal.static import STATIC_CONFIG
235            _label = STATIC_CONFIG['connectors']['default_label']
236            self.__dict__['label'] = _label
237        return _label

Return the label for this connector.

class InstanceConnector(meerschaum.connectors.Connector):
18class InstanceConnector(Connector):
19    """
20    Instance connectors define the interface for managing pipes and provide methods
21    for management of users, plugins, tokens, and other metadata built atop pipes.
22    """
23
24    IS_INSTANCE: bool = True
25    IS_THREAD_SAFE: bool = False
26
27    from ._users import (
28        get_users_pipe,
29        register_user,
30        get_user_id,
31        get_username,
32        get_users,
33        edit_user,
34        delete_user,
35        get_user_password_hash,
36        get_user_type,
37        get_user_attributes,
38    )
39
40    from ._plugins import (
41        get_plugins_pipe,
42        register_plugin,
43        get_plugin_user_id,
44        delete_plugin,
45        get_plugin_id,
46        get_plugin_version,
47        get_plugins,
48        get_plugin_user_id,
49        get_plugin_username,
50        get_plugin_attributes,
51    )
52
53    from ._tokens import (
54        get_tokens_pipe,
55        register_token,
56        edit_token,
57        invalidate_token,
58        delete_token,
59        get_token,
60        get_tokens,
61        get_token_model,
62        get_token_secret_hash,
63        token_exists,
64        get_token_scopes,
65    )
66
67    from ._pipes import (
68        register_pipe,
69        get_pipe_attributes,
70        get_pipe_id,
71        edit_pipe,
72        delete_pipe,
73        fetch_pipes_keys,
74        pipe_exists,
75        drop_pipe,
76        drop_pipe_indices,
77        sync_pipe,
78        create_pipe_indices,
79        clear_pipe,
80        get_pipe_data,
81        get_sync_time,
82        get_pipe_columns_types,
83        get_pipe_columns_indices,
84    )

Instance connectors define the interface for managing pipes and provide methods for management of users, plugins, tokens, and other metadata built atop pipes.

IS_INSTANCE: bool = True
IS_THREAD_SAFE: bool = False
def get_users_pipe(self) -> meerschaum.Pipe:
18def get_users_pipe(self) -> 'mrsm.Pipe':
19    """
20    Return the pipe used for users registration.
21    """
22    if '_users_pipe' in self.__dict__:
23        return self._users_pipe
24
25    cache_connector = self.__dict__.get('_cache_connector', None)
26    self._users_pipe = mrsm.Pipe(
27        'mrsm', 'users',
28        instance=self,
29        target='mrsm_users',
30        temporary=True,
31        cache=True,
32        cache_connector_keys=cache_connector,
33        static=True,
34        null_indices=False,
35        columns={
36            'primary': 'user_id',
37        },
38        dtypes={
39            'user_id': 'uuid',
40            'username': 'string',
41            'password_hash': 'string',
42            'email': 'string',
43            'user_type': 'string',
44            'attributes': 'json',
45        },
46        indices={
47            'unique': 'username',
48        },
49    )
50    return self._users_pipe

Return the pipe used for users registration.

def register_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
53def register_user(
54    self,
55    user: User,
56    debug: bool = False,
57    **kwargs: Any
58) -> mrsm.SuccessTuple:
59    """
60    Register a new user to the users pipe.
61    """
62    users_pipe = self.get_users_pipe()
63    user.user_id = uuid.uuid4()
64    sync_success, sync_msg = users_pipe.sync(
65        [{
66            'user_id': user.user_id,
67            'username': user.username,
68            'email': user.email,
69            'password_hash': user.password_hash,
70            'user_type': user.type,
71            'attributes': user.attributes,
72        }],
73        check_existing=False,
74        debug=debug,
75    )
76    if not sync_success:
77        return False, f"Failed to register user '{user.username}':\n{sync_msg}"
78
79    return True, "Success"

Register a new user to the users pipe.

def get_user_id( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[uuid.UUID]:
82def get_user_id(self, user: User, debug: bool = False) -> Union[uuid.UUID, None]:
83    """
84    Return a user's ID from the username.
85    """
86    users_pipe = self.get_users_pipe()
87    result_df = users_pipe.get_data(['user_id'], params={'username': user.username}, limit=1)
88    if result_df is None or len(result_df) == 0:
89        return None
90    return result_df['user_id'][0]

Return a user's ID from the username.

def get_username(self, user_id: Any, debug: bool = False) -> Any:
93def get_username(self, user_id: Any, debug: bool = False) -> Any:
94    """
95    Return the username from the given ID.
96    """
97    users_pipe = self.get_users_pipe()
98    return users_pipe.get_value('username', {'user_id': user_id}, debug=debug)

Return the username from the given ID.

def get_users(self, debug: bool = False, **kw: Any) -> List[str]:
101def get_users(
102    self,
103    debug: bool = False,
104    **kw: Any
105) -> List[str]:
106    """
107    Get the registered usernames.
108    """
109    users_pipe = self.get_users_pipe()
110    df = users_pipe.get_data()
111    if df is None:
112        return []
113
114    return list(df['username'])

Get the registered usernames.

def edit_user( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Tuple[bool, str]:
117def edit_user(self, user: User, debug: bool = False) -> mrsm.SuccessTuple:
118    """
119    Edit the attributes for an existing user.
120    """
121    users_pipe = self.get_users_pipe()
122    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
123
124    doc = {'user_id': user_id}
125    if user.email != '':
126        doc['email'] = user.email
127    if user.password_hash != '':
128        doc['password_hash'] = user.password_hash
129    if user.type != '':
130        doc['user_type'] = user.type
131    if user.attributes:
132        doc['attributes'] = user.attributes
133
134    sync_success, sync_msg = users_pipe.sync([doc], debug=debug)
135    if not sync_success:
136        return False, f"Failed to edit user '{user.username}':\n{sync_msg}"
137
138    return True, "Success"

Edit the attributes for an existing user.

def delete_user( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Tuple[bool, str]:
141def delete_user(self, user: User, debug: bool = False) -> mrsm.SuccessTuple:
142    """
143    Delete a user from the users table.
144    """
145    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
146    users_pipe = self.get_users_pipe()
147    clear_success, clear_msg = users_pipe.clear(params={'user_id': user_id}, debug=debug)
148    if not clear_success:
149        return False, f"Failed to delete user '{user}':\n{clear_msg}"
150    return True, "Success"

Delete a user from the users table.

def get_user_password_hash( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[uuid.UUID]:
153def get_user_password_hash(self, user: User, debug: bool = False) -> Union[uuid.UUID, None]:
154    """
155    Get a user's password hash from the users table.
156    """
157    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
158    users_pipe = self.get_users_pipe()
159    result_df = users_pipe.get_data(['password_hash'], params={'user_id': user_id}, debug=debug)
160    if result_df is None or len(result_df) == 0:
161        return None
162
163    return result_df['password_hash'][0]

Get a user's password hash from the users table.

def get_user_type( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[str]:
166def get_user_type(self, user: User, debug: bool = False) -> Union[str, None]:
167    """
168    Get a user's type from the users table.
169    """
170    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
171    users_pipe = self.get_users_pipe()
172    result_df = users_pipe.get_data(['user_type'], params={'user_id': user_id}, debug=debug)
173    if result_df is None or len(result_df) == 0:
174        return None
175
176    return result_df['user_type'][0]

Get a user's type from the users table.

def get_user_attributes( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[Dict[str, Any]]:
179def get_user_attributes(self, user: User, debug: bool = False) -> Union[Dict[str, Any], None]:
180    """
181    Get a user's attributes from the users table.
182    """
183    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
184    users_pipe = self.get_users_pipe()
185    result_df = users_pipe.get_data(['attributes'], params={'user_id': user_id}, debug=debug)
186    if result_df is None or len(result_df) == 0:
187        return None
188
189    return result_df['attributes'][0]

Get a user's attributes from the users table.

def get_plugins_pipe(self) -> meerschaum.Pipe:
16def get_plugins_pipe(self) -> 'mrsm.Pipe':
17    """
18    Return the internal pipe for syncing plugins metadata.
19    """
20    if '_plugins_pipe' in self.__dict__:
21        return self._plugins_pipe
22
23    cache_connector = self.__dict__.get('_cache_connector', None)
24    users_pipe = self.get_users_pipe()
25    user_id_dtype = users_pipe.dtypes.get('user_id', 'uuid')
26
27    self._plugins_pipe = mrsm.Pipe(
28        'mrsm', 'plugins',
29        instance=self,
30        target='mrsm_plugins',
31        temporary=True,
32        cache=True,
33        cache_connector_keys=cache_connector,
34        static=True,
35        null_indices=False,
36        columns={
37            'primary': 'plugin_name',
38            'user_id': 'user_id',
39        },
40        dtypes={
41            'plugin_name': 'string',
42            'user_id': user_id_dtype,
43            'attributes': 'json',
44            'version': 'string',
45        },
46    )
47    return self._plugins_pipe

Return the internal pipe for syncing plugins metadata.

def register_plugin( self, plugin: meerschaum.Plugin, debug: bool = False) -> Tuple[bool, str]:
50def register_plugin(self, plugin: Plugin, debug: bool = False) -> mrsm.SuccessTuple:
51    """
52    Register a new plugin to the plugins table.
53    """
54    plugins_pipe = self.get_plugins_pipe()
55    users_pipe = self.get_users_pipe()
56    user_id = self.get_plugin_user_id(plugin)
57    if user_id is not None:
58        username = self.get_username(user_id, debug=debug)
59        return False, f"{plugin} is already registered to '{username}'."
60
61    doc = {
62        'plugin_name': plugin.name,
63        'version': plugin.version,
64        'attributes': plugin.attributes,
65        'user_id': plugin.user_id,
66    }
67
68    sync_success, sync_msg = plugins_pipe.sync(
69        [doc],
70        check_existing=False,
71        debug=debug,
72    )
73    if not sync_success:
74        return False, f"Failed to register {plugin}:\n{sync_msg}"
75
76    return True, "Success"

Register a new plugin to the plugins table.

def get_plugin_user_id( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[uuid.UUID]:
79def get_plugin_user_id(self, plugin: Plugin, debug: bool = False) -> Union[uuid.UUID, None]:
80    """
81    Return the user ID for plugin's owner.
82    """
83    plugins_pipe = self.get_plugins_pipe() 
84    return plugins_pipe.get_value('user_id', {'plugin_name': plugin.name}, debug=debug)

Return the user ID for plugin's owner.

def delete_plugin( self, plugin: meerschaum.Plugin, debug: bool = False) -> Tuple[bool, str]:
105def delete_plugin(self, plugin: Plugin, debug: bool = False) -> mrsm.SuccessTuple:
106    """
107    Delete a plugin's registration.
108    """
109    plugin_id = self.get_plugin_id(plugin, debug=debug)
110    if plugin_id is None:
111        return False, f"{plugin} is not registered."
112    
113    plugins_pipe = self.get_plugins_pipe()
114    clear_success, clear_msg = plugins_pipe.clear(params={'plugin_name': plugin.name}, debug=debug)
115    if not clear_success:
116        return False, f"Failed to delete {plugin}:\n{clear_msg}"
117    return True, "Success"

Delete a plugin's registration.

def get_plugin_id( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[str]:
 97def get_plugin_id(self, plugin: Plugin, debug: bool = False) -> Union[str, None]:
 98    """
 99    Return a plugin's ID.
100    """
101    user_id = self.get_plugin_user_id(plugin, debug=debug)
102    return plugin.name if user_id is not None else None

Return a plugin's ID.

def get_plugin_version( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[str]:
120def get_plugin_version(self, plugin: Plugin, debug: bool = False) -> Union[str, None]:
121    """
122    Return the version for a plugin.
123    """
124    plugins_pipe = self.get_plugins_pipe() 
125    return plugins_pipe.get_value('version', {'plugin_name': plugin.name}, debug=debug)

Return the version for a plugin.

def get_plugins( self, user_id: Optional[int] = None, search_term: Optional[str] = None, debug: bool = False, **kw: Any) -> List[str]:
136def get_plugins(
137    self,
138    user_id: Optional[int] = None,
139    search_term: Optional[str] = None,
140    debug: bool = False,
141    **kw: Any
142) -> List[str]:
143    """
144    Return a list of plugin names.
145    """
146    plugins_pipe = self.get_plugins_pipe()
147    params = {}
148    if user_id:
149        params['user_id'] = user_id
150
151    df = plugins_pipe.get_data(['plugin_name'], params=params, debug=debug)
152    if df is None:
153        return []
154
155    docs = df.to_dict(orient='records')
156    return [
157        plugin_name
158        for doc in docs
159        if (plugin_name := doc['plugin_name']).startswith(search_term or '')
160    ]

Return a list of plugin names.

def get_plugin_username( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[uuid.UUID]:
87def get_plugin_username(self, plugin: Plugin, debug: bool = False) -> Union[uuid.UUID, None]:
88    """
89    Return the username for plugin's owner.
90    """
91    user_id = self.get_plugin_user_id(plugin, debug=debug)
92    if user_id is None:
93        return None
94    return self.get_username(user_id, debug=debug)

Return the username for plugin's owner.

def get_plugin_attributes( self, plugin: meerschaum.Plugin, debug: bool = False) -> Dict[str, Any]:
128def get_plugin_attributes(self, plugin: Plugin, debug: bool = False) -> Dict[str, Any]:
129    """
130    Return the attributes for a plugin.
131    """
132    plugins_pipe = self.get_plugins_pipe() 
133    return plugins_pipe.get_value('attributes', {'plugin_name': plugin.name}, debug=debug) or {}

Return the attributes for a plugin.

def get_tokens_pipe(self) -> meerschaum.Pipe:
22def get_tokens_pipe(self) -> mrsm.Pipe:
23    """
24    Return the internal pipe for tokens management.
25    """
26    if '_tokens_pipe' in self.__dict__:
27        return self._tokens_pipe
28
29    users_pipe = self.get_users_pipe()
30    user_id_dtype = (
31        users_pipe._attributes.get('parameters', {}).get('dtypes', {}).get('user_id', 'uuid')
32    )
33
34    cache_connector = self.__dict__.get('_cache_connector', None)
35
36    self._tokens_pipe = mrsm.Pipe(
37        'mrsm', 'tokens',
38        instance=self,
39        target='mrsm_tokens',
40        temporary=True,
41        cache=True,
42        cache_connector_keys=cache_connector,
43        static=True,
44        autotime=True,
45        null_indices=False,
46        columns={
47            'datetime': 'creation',
48            'primary': 'id',
49        },
50        indices={
51            'unique': 'label',
52            'user_id': 'user_id',
53        },
54        dtypes={
55            'id': 'uuid',
56            'creation': 'datetime',
57            'expiration': 'datetime',
58            'is_valid': 'bool',
59            'label': 'string',
60            'user_id': user_id_dtype,
61            'scopes': 'json',
62            'secret_hash': 'string',
63        },
64    )
65    return self._tokens_pipe

Return the internal pipe for tokens management.

def register_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
68def register_token(
69    self,
70    token: Token,
71    debug: bool = False,
72) -> mrsm.SuccessTuple:
73    """
74    Register the new token to the tokens table.
75    """
76    token_id, token_secret = token.generate_credentials()
77    tokens_pipe = self.get_tokens_pipe()
78    user_id = self.get_user_id(token.user) if token.user is not None else None
79    if user_id is None:
80        return False, "Cannot register a token without a user."
81
82    doc = {
83        'id': token_id,
84        'user_id': user_id,
85        'creation': datetime.now(timezone.utc),
86        'expiration': token.expiration,
87        'label': token.label,
88        'is_valid': token.is_valid,
89        'scopes': list(token.scopes) if token.scopes else [],
90        'secret_hash': hash_password(
91            str(token_secret),
92            rounds=STATIC_CONFIG['tokens']['hash_rounds']
93        ),
94    }
95    sync_success, sync_msg = tokens_pipe.sync([doc], check_existing=False, debug=debug)
96    if not sync_success:
97        return False, f"Failed to register token:\n{sync_msg}"
98    return True, "Success"

Register the new token to the tokens table.

def edit_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
101def edit_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
102    """
103    Persist the token's in-memory state to the tokens pipe.
104    """
105    if not token.id:
106        return False, "Token ID is not set."
107
108    if not token.exists(debug=debug):
109        return False, f"Token {token.id} does not exist."
110
111    if not token.creation:
112        token_model = self.get_token_model(token.id)
113        token.creation = token_model.creation
114
115    tokens_pipe = self.get_tokens_pipe()
116    doc = {
117        'id': token.id,
118        'creation': token.creation,
119        'expiration': token.expiration,
120        'label': token.label,
121        'is_valid': token.is_valid,
122        'scopes': list(token.scopes) if token.scopes else [],
123    }
124    sync_success, sync_msg = tokens_pipe.sync([doc], debug=debug)
125    if not sync_success:
126        return False, f"Failed to edit token '{token.id}':\n{sync_msg}"
127
128    return True, "Success"

Persist the token's in-memory state to the tokens pipe.

def invalidate_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
131def invalidate_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
132    """
133    Set `is_valid` to `False` for the given token.
134    """
135    if not token.id:
136        return False, "Token ID is not set."
137
138    if not token.exists(debug=debug):
139        return False, f"Token {token.id} does not exist."
140
141    if not token.creation:
142        token_model = self.get_token_model(token.id)
143        token.creation = token_model.creation
144
145    token.is_valid = False
146    tokens_pipe = self.get_tokens_pipe()
147    doc = {
148        'id': token.id,
149        'creation': token.creation,
150        'is_valid': False,
151    }
152    sync_success, sync_msg = tokens_pipe.sync([doc], debug=debug)
153    if not sync_success:
154        return False, f"Failed to invalidate token '{token.id}':\n{sync_msg}"
155
156    return True, "Success"

Set is_valid to False for the given token.

def delete_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
159def delete_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
160    """
161    Delete the given token from the tokens table.
162    """
163    if not token.id:
164        return False, "Token ID is not set."
165
166    if not token.exists(debug=debug):
167        return False, f"Token {token.id} does not exist."
168
169    if not token.creation:
170        token_model = self.get_token_model(token.id)
171        token.creation = token_model.creation
172
173    token.is_valid = False
174    tokens_pipe = self.get_tokens_pipe()
175    clear_success, clear_msg = tokens_pipe.clear(params={'id': token.id}, debug=debug)
176    if not clear_success:
177        return False, f"Failed to delete token '{token.id}':\n{clear_msg}"
178
179    return True, "Success"

Delete the given token from the tokens table.

def get_token( self, token_id: Union[uuid.UUID, str], debug: bool = False) -> Optional[meerschaum.core.Token._Token.Token]:
235def get_token(self, token_id: Union[uuid.UUID, str], debug: bool = False) -> Union[Token, None]:
236    """
237    Return the `Token` from its ID.
238    """
239    from meerschaum.utils.misc import is_uuid
240    if isinstance(token_id, str):
241        if is_uuid(token_id):
242            token_id = uuid.UUID(token_id)
243        else:
244            raise ValueError("Invalid token ID.")
245    token_model = self.get_token_model(token_id)
246    if token_model is None:
247        return None
248    return Token(**dict(token_model))

Return the Token from its ID.

def get_tokens( self, user: Optional[meerschaum.core.User._User.User] = None, labels: Optional[List[str]] = None, ids: Optional[List[uuid.UUID]] = None, debug: bool = False) -> List[meerschaum.core.Token._Token.Token]:
182def get_tokens(
183    self,
184    user: Optional[User] = None,
185    labels: Optional[List[str]] = None,
186    ids: Optional[List[uuid.UUID]] = None,
187    debug: bool = False,
188) -> List[Token]:
189    """
190    Return a list of `Token` objects.
191    """
192    tokens_pipe = self.get_tokens_pipe()
193    user_id = (
194        self.get_user_id(user, debug=debug)
195        if user is not None
196        else None
197    )
198    user_type = self.get_user_type(user, debug=debug) if user is not None else None
199    params = (
200        {
201            'user_id': (
202                user_id
203                if user_type != 'admin'
204                else [user_id, None]
205            )
206        }
207        if user_id is not None
208        else {}
209    )
210    if labels:
211        params['label'] = labels
212    if ids:
213        params['id'] = ids
214        
215    if debug:
216        dprint(f"Getting tokens with {user_id=}, {params=}")
217
218    tokens_df = tokens_pipe.get_data(params=params, debug=debug)
219    if tokens_df is None:
220        return []
221
222    if debug:
223        dprint(f"Retrieved tokens dataframe:\n{tokens_df}")
224
225    tokens_docs = tokens_df.to_dict(orient='records')
226    return [
227        Token(
228            instance=self,
229            **token_doc
230        )
231        for token_doc in reversed(tokens_docs)
232    ]

Return a list of Token objects.

def get_token_model( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> "'Union[TokenModel, None]'":
251def get_token_model(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> 'Union[TokenModel, None]':
252    """
253    Return a token's model from the instance.
254    """
255    from meerschaum.models import TokenModel
256    if isinstance(token_id, Token):
257        token_id = Token.id
258    if not token_id:
259        raise ValueError("Invalid token ID.")
260    tokens_pipe = self.get_tokens_pipe()
261    doc = tokens_pipe.get_doc(
262        params={'id': token_id},
263        debug=debug,
264    )
265    if doc is None:
266        return None
267    return TokenModel(**doc)

Return a token's model from the instance.

def get_token_secret_hash( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> Optional[str]:
270def get_token_secret_hash(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> Union[str, None]:
271    """
272    Return the secret hash for a given token.
273    """
274    if isinstance(token_id, Token):
275        token_id = token_id.id
276    if not token_id:
277        raise ValueError("Invalid token ID.")
278    tokens_pipe = self.get_tokens_pipe()
279    return tokens_pipe.get_value('secret_hash', params={'id': token_id}, debug=debug)

Return the secret hash for a given token.

def token_exists( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> bool:
308def token_exists(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> bool:
309    """
310    Return `True` if a token exists in the tokens pipe.
311    """
312    if isinstance(token_id, Token):
313        token_id = token_id.id
314    if not token_id:
315        raise ValueError("Invalid token ID.")
316
317    tokens_pipe = self.get_tokens_pipe()
318    return tokens_pipe.get_value('creation', params={'id': token_id}, debug=debug) is not None

Return True if a token exists in the tokens pipe.

def get_token_scopes( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> List[str]:
295def get_token_scopes(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> List[str]:
296    """
297    Return the scopes for a token.
298    """
299    if isinstance(token_id, Token):
300        token_id = token_id.id
301    if not token_id:
302        raise ValueError("Invalid token ID.")
303
304    tokens_pipe = self.get_tokens_pipe()
305    return tokens_pipe.get_value('scopes', params={'id': token_id}, debug=debug) or []

Return the scopes for a token.

@abc.abstractmethod
def register_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
17@abc.abstractmethod
18def register_pipe(
19    self,
20    pipe: mrsm.Pipe,
21    debug: bool = False,
22    **kwargs: Any
23) -> mrsm.SuccessTuple:
24    """
25    Insert the pipe's attributes into the internal `pipes` table.
26
27    Parameters
28    ----------
29    pipe: mrsm.Pipe
30        The pipe to be registered.
31
32    Returns
33    -------
34    A `SuccessTuple` of the result.
35    """

Insert the pipe's attributes into the internal pipes table.

Parameters
  • pipe (mrsm.Pipe): The pipe to be registered.
Returns
  • A SuccessTuple of the result.
@abc.abstractmethod
def get_pipe_attributes( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Dict[str, Any]:
37@abc.abstractmethod
38def get_pipe_attributes(
39    self,
40    pipe: mrsm.Pipe,
41    debug: bool = False,
42    **kwargs: Any
43) -> Dict[str, Any]:
44    """
45    Return the pipe's document from the internal `pipes` table.
46
47    Parameters
48    ----------
49    pipe: mrsm.Pipe
50        The pipe whose attributes should be retrieved.
51
52    Returns
53    -------
54    The document that matches the keys of the pipe.
55    """

Return the pipe's document from the internal pipes table.

Parameters
  • pipe (mrsm.Pipe): The pipe whose attributes should be retrieved.
Returns
  • The document that matches the keys of the pipe.
@abc.abstractmethod
def get_pipe_id( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Union[str, int, NoneType]:
57@abc.abstractmethod
58def get_pipe_id(
59    self,
60    pipe: mrsm.Pipe,
61    debug: bool = False,
62    **kwargs: Any
63) -> Union[str, int, None]:
64    """
65    Return the `id` for the pipe if it exists.
66
67    Parameters
68    ----------
69    pipe: mrsm.Pipe
70        The pipe whose `id` to fetch.
71
72    Returns
73    -------
74    The `id` for the pipe's document or `None`.
75    """

Return the id for the pipe if it exists.

Parameters
  • pipe (mrsm.Pipe): The pipe whose id to fetch.
Returns
  • The id for the pipe's document or None.
def edit_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
77def edit_pipe(
78    self,
79    pipe: mrsm.Pipe,
80    debug: bool = False,
81    **kwargs: Any
82) -> mrsm.SuccessTuple:
83    """
84    Edit the attributes of the pipe.
85
86    Parameters
87    ----------
88    pipe: mrsm.Pipe
89        The pipe whose in-memory parameters must be persisted.
90
91    Returns
92    -------
93    A `SuccessTuple` indicating success.
94    """
95    raise NotImplementedError

Edit the attributes of the pipe.

Parameters
  • pipe (mrsm.Pipe): The pipe whose in-memory parameters must be persisted.
Returns
  • A SuccessTuple indicating success.
def delete_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
 97def delete_pipe(
 98    self,
 99    pipe: mrsm.Pipe,
100    debug: bool = False,
101    **kwargs: Any
102) -> mrsm.SuccessTuple:
103    """
104    Delete a pipe's registration from the `pipes` collection.
105
106    Parameters
107    ----------
108    pipe: mrsm.Pipe
109        The pipe to be deleted.
110
111    Returns
112    -------
113    A `SuccessTuple` indicating success.
114    """
115    raise NotImplementedError

Delete a pipe's registration from the pipes collection.

Parameters
  • pipe (mrsm.Pipe): The pipe to be deleted.
Returns
  • A SuccessTuple indicating success.
@abc.abstractmethod
def fetch_pipes_keys( self, connector_keys: Optional[List[str]] = None, metric_keys: Optional[List[str]] = None, location_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, debug: bool = False, **kwargs: Any) -> List[Tuple[str, str, str]]:
117@abc.abstractmethod
118def fetch_pipes_keys(
119    self,
120    connector_keys: Optional[List[str]] = None,
121    metric_keys: Optional[List[str]] = None,
122    location_keys: Optional[List[str]] = None,
123    tags: Optional[List[str]] = None,
124    debug: bool = False,
125    **kwargs: Any
126) -> List[Tuple[str, str, str]]:
127    """
128    Return a list of tuples for the registered pipes' keys according to the provided filters.
129
130    Parameters
131    ----------
132    connector_keys: list[str] | None, default None
133        The keys passed via `-c`.
134
135    metric_keys: list[str] | None, default None
136        The keys passed via `-m`.
137
138    location_keys: list[str] | None, default None
139        The keys passed via `-l`.
140
141    tags: List[str] | None, default None
142        Tags passed via `--tags` which are stored under `parameters:tags`.
143
144    Returns
145    -------
146    A list of connector, metric, and location keys in tuples.
147    You may return the string "None" for location keys in place of nulls.
148
149    Examples
150    --------
151    >>> import meerschaum as mrsm
152    >>> conn = mrsm.get_connector('example:demo')
153    >>> 
154    >>> pipe_a = mrsm.Pipe('a', 'demo', tags=['foo'], instance=conn)
155    >>> pipe_b = mrsm.Pipe('b', 'demo', tags=['bar'], instance=conn)
156    >>> pipe_a.register()
157    >>> pipe_b.register()
158    >>> 
159    >>> conn.fetch_pipes_keys(['a', 'b'])
160    [('a', 'demo', 'None'), ('b', 'demo', 'None')]
161    >>> conn.fetch_pipes_keys(metric_keys=['demo'])
162    [('a', 'demo', 'None'), ('b', 'demo', 'None')]
163    >>> conn.fetch_pipes_keys(tags=['foo'])
164    [('a', 'demo', 'None')]
165    >>> conn.fetch_pipes_keys(location_keys=[None])
166    [('a', 'demo', 'None'), ('b', 'demo', 'None')]
167    """

Return a list of tuples for the registered pipes' keys according to the provided filters.

Parameters
  • connector_keys (list[str] | None, default None): The keys passed via -c.
  • metric_keys (list[str] | None, default None): The keys passed via -m.
  • location_keys (list[str] | None, default None): The keys passed via -l.
  • tags (List[str] | None, default None): Tags passed via --tags which are stored under parameters:tags.
Returns
  • A list of connector, metric, and location keys in tuples.
  • You may return the string "None" for location keys in place of nulls.
Examples
>>> import meerschaum as mrsm
>>> conn = mrsm.get_connector('example:demo')
>>> 
>>> pipe_a = mrsm.Pipe('a', 'demo', tags=['foo'], instance=conn)
>>> pipe_b = mrsm.Pipe('b', 'demo', tags=['bar'], instance=conn)
>>> pipe_a.register()
>>> pipe_b.register()
>>> 
>>> conn.fetch_pipes_keys(['a', 'b'])
[('a', 'demo', 'None'), ('b', 'demo', 'None')]
>>> conn.fetch_pipes_keys(metric_keys=['demo'])
[('a', 'demo', 'None'), ('b', 'demo', 'None')]
>>> conn.fetch_pipes_keys(tags=['foo'])
[('a', 'demo', 'None')]
>>> conn.fetch_pipes_keys(location_keys=[None])
[('a', 'demo', 'None'), ('b', 'demo', 'None')]
@abc.abstractmethod
def pipe_exists( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> bool:
169@abc.abstractmethod
170def pipe_exists(
171    self,
172    pipe: mrsm.Pipe,
173    debug: bool = False,
174    **kwargs: Any
175) -> bool:
176    """
177    Check whether a pipe's target table exists.
178
179    Parameters
180    ----------
181    pipe: mrsm.Pipe
182        The pipe to check whether its table exists.
183
184    Returns
185    -------
186    A `bool` indicating the table exists.
187    """

Check whether a pipe's target table exists.

Parameters
  • pipe (mrsm.Pipe): The pipe to check whether its table exists.
Returns
  • A bool indicating the table exists.
@abc.abstractmethod
def drop_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
189@abc.abstractmethod
190def drop_pipe(
191    self,
192    pipe: mrsm.Pipe,
193    debug: bool = False,
194    **kwargs: Any
195) -> mrsm.SuccessTuple:
196    """
197    Drop a pipe's collection if it exists.
198
199    Parameters
200    ----------
201    pipe: mrsm.Pipe
202        The pipe to be dropped.
203
204    Returns
205    -------
206    A `SuccessTuple` indicating success.
207    """
208    raise NotImplementedError

Drop a pipe's collection if it exists.

Parameters
  • pipe (mrsm.Pipe): The pipe to be dropped.
Returns
  • A SuccessTuple indicating success.
def drop_pipe_indices( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
210def drop_pipe_indices(
211    self,
212    pipe: mrsm.Pipe,
213    debug: bool = False,
214    **kwargs: Any
215) -> mrsm.SuccessTuple:
216    """
217    Drop a pipe's indices.
218
219    Parameters
220    ----------
221    pipe: mrsm.Pipe
222        The pipe whose indices need to be dropped.
223
224    Returns
225    -------
226    A `SuccessTuple` indicating success.
227    """
228    return False, f"Cannot drop indices for instance connectors of type '{self.type}'."

Drop a pipe's indices.

Parameters
  • pipe (mrsm.Pipe): The pipe whose indices need to be dropped.
Returns
  • A SuccessTuple indicating success.
@abc.abstractmethod
def sync_pipe( self, pipe: meerschaum.Pipe, df: "'pd.DataFrame'" = None, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, chunksize: Optional[int] = -1, check_existing: bool = True, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
230@abc.abstractmethod
231def sync_pipe(
232    self,
233    pipe: mrsm.Pipe,
234    df: 'pd.DataFrame' = None,
235    begin: Union[datetime, int, None] = None,
236    end: Union[datetime, int, None] = None,
237    chunksize: Optional[int] = -1,
238    check_existing: bool = True,
239    debug: bool = False,
240    **kwargs: Any
241) -> mrsm.SuccessTuple:
242    """
243    Sync a pipe using a database connection.
244
245    Parameters
246    ----------
247    pipe: mrsm.Pipe
248        The Meerschaum Pipe instance into which to sync the data.
249
250    df: Optional[pd.DataFrame]
251        An optional DataFrame or equivalent to sync into the pipe.
252        Defaults to `None`.
253
254    begin: Union[datetime, int, None], default None
255        Optionally specify the earliest datetime to search for data.
256        Defaults to `None`.
257
258    end: Union[datetime, int, None], default None
259        Optionally specify the latest datetime to search for data.
260        Defaults to `None`.
261
262    chunksize: Optional[int], default -1
263        Specify the number of rows to sync per chunk.
264        If `-1`, resort to system configuration (default is `900`).
265        A `chunksize` of `None` will sync all rows in one transaction.
266        Defaults to `-1`.
267
268    check_existing: bool, default True
269        If `True`, pull and diff with existing data from the pipe. Defaults to `True`.
270
271    debug: bool, default False
272        Verbosity toggle. Defaults to False.
273
274    Returns
275    -------
276    A `SuccessTuple` of success (`bool`) and message (`str`).
277    """

Sync a pipe using a database connection.

Parameters
  • pipe (mrsm.Pipe): The Meerschaum Pipe instance into which to sync the data.
  • df (Optional[pd.DataFrame]): An optional DataFrame or equivalent to sync into the pipe. Defaults to None.
  • begin (Union[datetime, int, None], default None): Optionally specify the earliest datetime to search for data. Defaults to None.
  • end (Union[datetime, int, None], default None): Optionally specify the latest datetime to search for data. Defaults to None.
  • chunksize (Optional[int], default -1): Specify the number of rows to sync per chunk. If -1, resort to system configuration (default is 900). A chunksize of None will sync all rows in one transaction. Defaults to -1.
  • check_existing (bool, default True): If True, pull and diff with existing data from the pipe. Defaults to True.
  • debug (bool, default False): Verbosity toggle. Defaults to False.
Returns
  • A SuccessTuple of success (bool) and message (str).
def create_pipe_indices( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
279def create_pipe_indices(
280    self,
281    pipe: mrsm.Pipe,
282    debug: bool = False,
283    **kwargs: Any
284) -> mrsm.SuccessTuple:
285    """
286    Create a pipe's indices.
287
288    Parameters
289    ----------
290    pipe: mrsm.Pipe
291        The pipe whose indices need to be created.
292
293    Returns
294    -------
295    A `SuccessTuple` indicating success.
296    """
297    return False, f"Cannot create indices for instance connectors of type '{self.type}'."

Create a pipe's indices.

Parameters
  • pipe (mrsm.Pipe): The pipe whose indices need to be created.
Returns
  • A SuccessTuple indicating success.
def clear_pipe( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
299def clear_pipe(
300    self,
301    pipe: mrsm.Pipe,
302    begin: Union[datetime, int, None] = None,
303    end: Union[datetime, int, None] = None,
304    params: Optional[Dict[str, Any]] = None,
305    debug: bool = False,
306    **kwargs: Any
307) -> mrsm.SuccessTuple:
308    """
309    Delete rows within `begin`, `end`, and `params`.
310
311    Parameters
312    ----------
313    pipe: mrsm.Pipe
314        The pipe whose rows to clear.
315
316    begin: datetime | int | None, default None
317        If provided, remove rows >= `begin`.
318
319    end: datetime | int | None, default None
320        If provided, remove rows < `end`.
321
322    params: dict[str, Any] | None, default None
323        If provided, only remove rows which match the `params` filter.
324
325    Returns
326    -------
327    A `SuccessTuple` indicating success.
328    """
329    raise NotImplementedError

Delete rows within begin, end, and params.

Parameters
  • pipe (mrsm.Pipe): The pipe whose rows to clear.
  • begin (datetime | int | None, default None): If provided, remove rows >= begin.
  • end (datetime | int | None, default None): If provided, remove rows < end.
  • params (dict[str, Any] | None, default None): If provided, only remove rows which match the params filter.
Returns
  • A SuccessTuple indicating success.
@abc.abstractmethod
def get_pipe_data( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kwargs: Any) -> "Union['pd.DataFrame', None]":
331@abc.abstractmethod
332def get_pipe_data(
333    self,
334    pipe: mrsm.Pipe,
335    select_columns: Optional[List[str]] = None,
336    omit_columns: Optional[List[str]] = None,
337    begin: Union[datetime, int, None] = None,
338    end: Union[datetime, int, None] = None,
339    params: Optional[Dict[str, Any]] = None,
340    debug: bool = False,
341    **kwargs: Any
342) -> Union['pd.DataFrame', None]:
343    """
344    Query a pipe's target table and return the DataFrame.
345
346    Parameters
347    ----------
348    pipe: mrsm.Pipe
349        The pipe with the target table from which to read.
350
351    select_columns: list[str] | None, default None
352        If provided, only select these given columns.
353        Otherwise select all available columns (i.e. `SELECT *`).
354
355    omit_columns: list[str] | None, default None
356        If provided, remove these columns from the selection.
357
358    begin: datetime | int | None, default None
359        The earliest `datetime` value to search from (inclusive).
360
361    end: datetime | int | None, default None
362        The lastest `datetime` value to search from (exclusive).
363
364    params: dict[str | str] | None, default None
365        Additional filters to apply to the query.
366
367    Returns
368    -------
369    The target table's data as a DataFrame.
370    """

Query a pipe's target table and return the DataFrame.

Parameters
  • pipe (mrsm.Pipe): The pipe with the target table from which to read.
  • select_columns (list[str] | None, default None): If provided, only select these given columns. Otherwise select all available columns (i.e. SELECT *).
  • omit_columns (list[str] | None, default None): If provided, remove these columns from the selection.
  • begin (datetime | int | None, default None): The earliest datetime value to search from (inclusive).
  • end (datetime | int | None, default None): The lastest datetime value to search from (exclusive).
  • params (dict[str | str] | None, default None): Additional filters to apply to the query.
Returns
  • The target table's data as a DataFrame.
@abc.abstractmethod
def get_sync_time( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, newest: bool = True, debug: bool = False, **kwargs: Any) -> datetime.datetime | int | None:
372@abc.abstractmethod
373def get_sync_time(
374    self,
375    pipe: mrsm.Pipe,
376    params: Optional[Dict[str, Any]] = None,
377    newest: bool = True,
378    debug: bool = False,
379    **kwargs: Any
380) -> datetime | int | None:
381    """
382    Return the most recent value for the `datetime` axis.
383
384    Parameters
385    ----------
386    pipe: mrsm.Pipe
387        The pipe whose collection contains documents.
388
389    params: dict[str, Any] | None, default None
390        Filter certain parameters when determining the sync time.
391
392    newest: bool, default True
393        If `True`, return the maximum value for the column.
394
395    Returns
396    -------
397    The largest `datetime` or `int` value of the `datetime` axis. 
398    """

Return the most recent value for the datetime axis.

Parameters
  • pipe (mrsm.Pipe): The pipe whose collection contains documents.
  • params (dict[str, Any] | None, default None): Filter certain parameters when determining the sync time.
  • newest (bool, default True): If True, return the maximum value for the column.
Returns
  • The largest datetime or int value of the datetime axis.
@abc.abstractmethod
def get_pipe_columns_types( self, pipe: meerschaum.Pipe, debug: bool = False, **kwargs: Any) -> Dict[str, str]:
400@abc.abstractmethod
401def get_pipe_columns_types(
402    self,
403    pipe: mrsm.Pipe,
404    debug: bool = False,
405    **kwargs: Any
406) -> Dict[str, str]:
407    """
408    Return the data types for the columns in the target table for data type enforcement.
409
410    Parameters
411    ----------
412    pipe: mrsm.Pipe
413        The pipe whose target table contains columns and data types.
414
415    Returns
416    -------
417    A dictionary mapping columns to data types.
418    """

Return the data types for the columns in the target table for data type enforcement.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table contains columns and data types.
Returns
  • A dictionary mapping columns to data types.
def get_pipe_columns_indices(self, debug: bool = False) -> Dict[str, List[Dict[str, str]]]:
420def get_pipe_columns_indices(
421    self,
422    debug: bool = False,
423) -> Dict[str, List[Dict[str, str]]]:
424    """
425    Return a dictionary mapping columns to metadata about related indices.
426
427    Parameters
428    ----------
429    pipe: mrsm.Pipe
430        The pipe whose target table has related indices.
431
432    Returns
433    -------
434    A list of dictionaries with the keys "type" and "name".
435
436    Examples
437    --------
438    >>> pipe = mrsm.Pipe('demo', 'shirts', columns={'primary': 'id'}, indices={'size_color': ['color', 'size']})
439    >>> pipe.sync([{'color': 'red', 'size': 'M'}])
440    >>> pipe.get_columns_indices()
441    {'id': [{'name': 'demo_shirts_pkey', 'type': 'PRIMARY KEY'}], 'color': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}], 'size': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}]}
442    """
443    return {}

Return a dictionary mapping columns to metadata about related indices.

Parameters
  • pipe (mrsm.Pipe): The pipe whose target table has related indices.
Returns
  • A list of dictionaries with the keys "type" and "name".
Examples
>>> pipe = mrsm.Pipe('demo', 'shirts', columns={'primary': 'id'}, indices={'size_color': ['color', 'size']})
>>> pipe.sync([{'color': 'red', 'size': 'M'}])
>>> pipe.get_columns_indices()
{'id': [{'name': 'demo_shirts_pkey', 'type': 'PRIMARY KEY'}], 'color': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}], 'size': [{'name': 'IX_demo_shirts_color_size', 'type': 'INDEX'}]}
class SQLConnector(meerschaum.connectors.InstanceConnector):
 20class SQLConnector(InstanceConnector):
 21    """
 22    Connect to SQL databases via `sqlalchemy`.
 23    
 24    SQLConnectors may be used as Meerschaum instance connectors.
 25    Read more about connectors and instances at
 26    https://meerschaum.io/reference/connectors/
 27
 28    """
 29
 30    from ._create_engine import flavor_configs, create_engine
 31    from ._sql import (
 32        read,
 33        value,
 34        exec,
 35        execute,
 36        to_sql,
 37        exec_queries,
 38        get_connection,
 39        _cleanup_connections,
 40    )
 41    from meerschaum.utils.sql import test_connection
 42    from ._fetch import fetch, get_pipe_metadef
 43    from ._cli import cli, _cli_exit
 44    from ._pipes import (
 45        fetch_pipes_keys,
 46        create_indices,
 47        drop_indices,
 48        get_create_index_queries,
 49        get_drop_index_queries,
 50        get_add_columns_queries,
 51        get_alter_columns_queries,
 52        delete_pipe,
 53        get_pipe_data,
 54        get_pipe_data_query,
 55        register_pipe,
 56        edit_pipe,
 57        get_pipe_id,
 58        get_pipe_attributes,
 59        sync_pipe,
 60        sync_pipe_inplace,
 61        get_sync_time,
 62        pipe_exists,
 63        get_pipe_rowcount,
 64        drop_pipe,
 65        clear_pipe,
 66        deduplicate_pipe,
 67        get_pipe_table,
 68        get_pipe_columns_types,
 69        get_to_sql_dtype,
 70        get_pipe_schema,
 71        create_pipe_table_from_df,
 72        get_pipe_columns_indices,
 73        get_temporary_target,
 74        create_pipe_indices,
 75        drop_pipe_indices,
 76        get_pipe_index_names,
 77        _init_geopackage_pipe,
 78    )
 79    from ._plugins import (
 80        get_plugins_pipe,
 81        register_plugin,
 82        delete_plugin,
 83        get_plugin_id,
 84        get_plugin_version,
 85        get_plugins,
 86        get_plugin_user_id,
 87        get_plugin_username,
 88        get_plugin_attributes,
 89    )
 90    from ._users import (
 91        get_users_pipe,
 92        register_user,
 93        get_user_id,
 94        get_users,
 95        edit_user,
 96        delete_user,
 97        get_user_password_hash,
 98        get_user_type,
 99        get_user_attributes,
100    )
101    from ._uri import from_uri, parse_uri
102    from ._instance import (
103        _log_temporary_tables_creation,
104        _drop_temporary_table,
105        _drop_temporary_tables,
106        _drop_old_temporary_tables,
107    )
108
109    def __init__(
110        self,
111        label: Optional[str] = None,
112        flavor: Optional[str] = None,
113        wait: bool = False,
114        connect: bool = False,
115        debug: bool = False,
116        **kw: Any
117    ):
118        """
119        Parameters
120        ----------
121        label: str, default 'main'
122            The identifying label for the connector.
123            E.g. for `sql:main`, 'main' is the label.
124            Defaults to 'main'.
125
126        flavor: Optional[str], default None
127            The database flavor, e.g.
128            `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc.
129            To see supported flavors, run the `bootstrap connectors` command.
130
131        wait: bool, default False
132            If `True`, block until a database connection has been made.
133            Defaults to `False`.
134
135        connect: bool, default False
136            If `True`, immediately attempt to connect the database and raise
137            a warning if the connection fails.
138            Defaults to `False`.
139
140        debug: bool, default False
141            Verbosity toggle.
142            Defaults to `False`.
143
144        kw: Any
145            All other arguments will be passed to the connector's attributes.
146            Therefore, a connector may be made without being registered,
147            as long enough parameters are supplied to the constructor.
148        """
149        if 'uri' in kw:
150            uri = kw['uri']
151            if uri.startswith('postgres') and not uri.startswith('postgresql'):
152                uri = uri.replace('postgres', 'postgresql', 1)
153            if uri.startswith('postgresql') and not uri.startswith('postgresql+'):
154                uri = uri.replace('postgresql://', 'postgresql+psycopg://', 1)
155            if uri.startswith('timescaledb://'):
156                uri = uri.replace('timescaledb://', 'postgresql+psycopg://', 1)
157                flavor = 'timescaledb'
158            if uri.startswith('timescaledb-ha://'):
159                uri = uri.replace('timescaledb-ha://', 'postgresql+psycopg://', 1)
160                flavor = 'timescaledb-ha'
161            if uri.startswith('postgis://'):
162                uri = uri.replace('postgis://', 'postgresql+psycopg://', 1)
163                flavor = 'postgis'
164            kw['uri'] = uri
165            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
166            label = label or from_uri_params.get('label', None)
167            _ = from_uri_params.pop('label', None)
168
169            ### Sometimes the flavor may be provided with a URI.
170            kw.update(from_uri_params)
171            if flavor:
172                kw['flavor'] = flavor
173
174        ### set __dict__ in base class
175        super().__init__(
176            'sql',
177            label = label or self.__dict__.get('label', None),
178            **kw
179        )
180
181        if self.__dict__.get('flavor', None) in ('sqlite', 'geopackage'):
182            self._reset_attributes()
183            self._set_attributes(
184                'sql',
185                label = label,
186                inherit_default = False,
187                **kw
188            )
189            ### For backwards compatability reasons, set the path for sql:local if its missing.
190            if self.label == 'local' and not self.__dict__.get('database', None):
191                from meerschaum.config._paths import SQLITE_DB_PATH
192                self.database = SQLITE_DB_PATH.as_posix()
193
194        ### ensure flavor and label are set accordingly
195        if 'flavor' not in self.__dict__:
196            if flavor is None and 'uri' not in self.__dict__:
197                raise ValueError(
198                    f"    Missing flavor. Provide flavor as a key for '{self}'."
199                )
200            self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None)
201
202        if self.flavor == 'postgres':
203            self.flavor = 'postgresql'
204
205        self._debug = debug
206        ### Store the PID and thread at initialization
207        ### so we can dispose of the Pool in child processes or threads.
208        import os
209        import threading
210        self._pid = os.getpid()
211        self._thread_ident = threading.current_thread().ident
212        self._sessions = {}
213        self._locks = {'_sessions': threading.RLock(), }
214
215        ### verify the flavor's requirements are met
216        if self.flavor not in self.flavor_configs:
217            error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector")
218        if not self.__dict__.get('uri'):
219            self.verify_attributes(
220                self.flavor_configs[self.flavor].get('requirements', set()),
221                debug=debug,
222            )
223
224        if wait:
225            from meerschaum.connectors.poll import retry_connect
226            retry_connect(connector=self, debug=debug)
227
228        if connect:
229            if not self.test_connection(debug=debug):
230                warn(f"Failed to connect with connector '{self}'!", stack=False)
231
232    @property
233    def Session(self):
234        if '_Session' not in self.__dict__:
235            if self.engine is None:
236                return None
237
238            from meerschaum.utils.packages import attempt_import
239            sqlalchemy_orm = attempt_import('sqlalchemy.orm', lazy=False)
240            session_factory = sqlalchemy_orm.sessionmaker(self.engine)
241            self._Session = sqlalchemy_orm.scoped_session(session_factory)
242
243        return self._Session
244
245    @property
246    def engine(self):
247        """
248        Return the SQLAlchemy engine connected to the configured database.
249        """
250        import os
251        import threading
252        if '_engine' not in self.__dict__:
253            self._engine, self._engine_str = self.create_engine(include_uri=True)
254
255        same_process = os.getpid() == self._pid
256        same_thread = threading.current_thread().ident == self._thread_ident
257
258        ### handle child processes
259        if not same_process:
260            self._pid = os.getpid()
261            self._thread = threading.current_thread()
262            warn("Different PID detected. Disposing of connections...")
263            self._engine.dispose()
264
265        ### handle different threads
266        if not same_thread:
267            if self.flavor == 'duckdb':
268                warn("Different thread detected.")
269                self._engine.dispose()
270
271        return self._engine
272
273    @property
274    def DATABASE_URL(self) -> str:
275        """
276        Return the URI connection string (alias for `SQLConnector.URI`.
277        """
278        _ = self.engine
279        return str(self._engine_str)
280
281    @property
282    def URI(self) -> str:
283        """
284        Return the URI connection string.
285        """
286        _ = self.engine
287        return str(self._engine_str)
288
289    @property
290    def IS_THREAD_SAFE(self) -> str:
291        """
292        Return whether this connector may be multithreaded.
293        """
294        if self.flavor in ('duckdb', 'oracle'):
295            return False
296        if self.flavor in ('sqlite', 'geopackage'):
297            return ':memory:' not in self.URI
298        return True
299
300    @property
301    def metadata(self):
302        """
303        Return the metadata bound to this configured schema.
304        """
305        from meerschaum.utils.packages import attempt_import
306        sqlalchemy = attempt_import('sqlalchemy', lazy=False)
307        if '_metadata' not in self.__dict__:
308            self._metadata = sqlalchemy.MetaData(schema=self.schema)
309        return self._metadata
310
311    @property
312    def instance_schema(self):
313        """
314        Return the schema name for Meerschaum tables. 
315        """
316        return self.schema
317
318    @property
319    def internal_schema(self):
320        """
321        Return the schema name for internal tables. 
322        """
323        from meerschaum._internal.static import STATIC_CONFIG
324        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
325        schema_name = self.__dict__.get('internal_schema', None) or (
326            STATIC_CONFIG['sql']['internal_schema']
327            if self.flavor not in NO_SCHEMA_FLAVORS
328            else self.schema
329        )
330
331        if '_internal_schema' not in self.__dict__:
332            self._internal_schema = schema_name
333        return self._internal_schema
334
335    @property
336    def db(self) -> Optional[databases.Database]:
337        from meerschaum.utils.packages import attempt_import
338        databases = attempt_import('databases', lazy=False, install=True)
339        url = self.DATABASE_URL
340        if 'mysql' in url:
341            url = url.replace('+pymysql', '')
342        if '_db' not in self.__dict__:
343            try:
344                self._db = databases.Database(url)
345            except KeyError:
346                ### Likely encountered an unsupported flavor.
347                from meerschaum.utils.warnings import warn
348                self._db = None
349        return self._db
350
351    @property
352    def db_version(self) -> Union[str, None]:
353        """
354        Return the database version.
355        """
356        _db_version = self.__dict__.get('_db_version', None)
357        if _db_version is not None:
358            return _db_version
359
360        from meerschaum.utils.sql import get_db_version
361        self._db_version = get_db_version(self)
362        return self._db_version
363
364    @property
365    def schema(self) -> Union[str, None]:
366        """
367        Return the default schema to use.
368        A value of `None` will not prepend a schema.
369        """
370        if 'schema' in self.__dict__:
371            return self.__dict__['schema']
372
373        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
374        if self.flavor in NO_SCHEMA_FLAVORS:
375            self.__dict__['schema'] = None
376            return None
377
378        sqlalchemy = mrsm.attempt_import('sqlalchemy', lazy=False)
379        _schema = sqlalchemy.inspect(self.engine).default_schema_name
380        self.__dict__['schema'] = _schema
381        return _schema
382
383    def get_metadata_cache_path(self, kind: str = 'json') -> pathlib.Path:
384        """
385        Return the path to the file to which to write metadata cache.
386        """
387        from meerschaum.config.paths import SQL_CONN_CACHE_RESOURCES_PATH
388        filename = (
389            f'{self.label}-metadata.pkl'
390            if kind == 'pkl'
391            else f'{self.label}.json'
392        )
393        return SQL_CONN_CACHE_RESOURCES_PATH / filename
394
395    def __getstate__(self):
396        return self.__dict__
397
398    def __setstate__(self, d):
399        self.__dict__.update(d)
400
401    def __call__(self):
402        return self

Connect to SQL databases via sqlalchemy.

SQLConnectors may be used as Meerschaum instance connectors. Read more about connectors and instances at https://meerschaum.io/reference/connectors/

SQLConnector( label: Optional[str] = None, flavor: Optional[str] = None, wait: bool = False, connect: bool = False, debug: bool = False, **kw: Any)
109    def __init__(
110        self,
111        label: Optional[str] = None,
112        flavor: Optional[str] = None,
113        wait: bool = False,
114        connect: bool = False,
115        debug: bool = False,
116        **kw: Any
117    ):
118        """
119        Parameters
120        ----------
121        label: str, default 'main'
122            The identifying label for the connector.
123            E.g. for `sql:main`, 'main' is the label.
124            Defaults to 'main'.
125
126        flavor: Optional[str], default None
127            The database flavor, e.g.
128            `'sqlite'`, `'postgresql'`, `'cockroachdb'`, etc.
129            To see supported flavors, run the `bootstrap connectors` command.
130
131        wait: bool, default False
132            If `True`, block until a database connection has been made.
133            Defaults to `False`.
134
135        connect: bool, default False
136            If `True`, immediately attempt to connect the database and raise
137            a warning if the connection fails.
138            Defaults to `False`.
139
140        debug: bool, default False
141            Verbosity toggle.
142            Defaults to `False`.
143
144        kw: Any
145            All other arguments will be passed to the connector's attributes.
146            Therefore, a connector may be made without being registered,
147            as long enough parameters are supplied to the constructor.
148        """
149        if 'uri' in kw:
150            uri = kw['uri']
151            if uri.startswith('postgres') and not uri.startswith('postgresql'):
152                uri = uri.replace('postgres', 'postgresql', 1)
153            if uri.startswith('postgresql') and not uri.startswith('postgresql+'):
154                uri = uri.replace('postgresql://', 'postgresql+psycopg://', 1)
155            if uri.startswith('timescaledb://'):
156                uri = uri.replace('timescaledb://', 'postgresql+psycopg://', 1)
157                flavor = 'timescaledb'
158            if uri.startswith('timescaledb-ha://'):
159                uri = uri.replace('timescaledb-ha://', 'postgresql+psycopg://', 1)
160                flavor = 'timescaledb-ha'
161            if uri.startswith('postgis://'):
162                uri = uri.replace('postgis://', 'postgresql+psycopg://', 1)
163                flavor = 'postgis'
164            kw['uri'] = uri
165            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
166            label = label or from_uri_params.get('label', None)
167            _ = from_uri_params.pop('label', None)
168
169            ### Sometimes the flavor may be provided with a URI.
170            kw.update(from_uri_params)
171            if flavor:
172                kw['flavor'] = flavor
173
174        ### set __dict__ in base class
175        super().__init__(
176            'sql',
177            label = label or self.__dict__.get('label', None),
178            **kw
179        )
180
181        if self.__dict__.get('flavor', None) in ('sqlite', 'geopackage'):
182            self._reset_attributes()
183            self._set_attributes(
184                'sql',
185                label = label,
186                inherit_default = False,
187                **kw
188            )
189            ### For backwards compatability reasons, set the path for sql:local if its missing.
190            if self.label == 'local' and not self.__dict__.get('database', None):
191                from meerschaum.config._paths import SQLITE_DB_PATH
192                self.database = SQLITE_DB_PATH.as_posix()
193
194        ### ensure flavor and label are set accordingly
195        if 'flavor' not in self.__dict__:
196            if flavor is None and 'uri' not in self.__dict__:
197                raise ValueError(
198                    f"    Missing flavor. Provide flavor as a key for '{self}'."
199                )
200            self.flavor = flavor or self.parse_uri(self.__dict__['uri']).get('flavor', None)
201
202        if self.flavor == 'postgres':
203            self.flavor = 'postgresql'
204
205        self._debug = debug
206        ### Store the PID and thread at initialization
207        ### so we can dispose of the Pool in child processes or threads.
208        import os
209        import threading
210        self._pid = os.getpid()
211        self._thread_ident = threading.current_thread().ident
212        self._sessions = {}
213        self._locks = {'_sessions': threading.RLock(), }
214
215        ### verify the flavor's requirements are met
216        if self.flavor not in self.flavor_configs:
217            error(f"Flavor '{self.flavor}' is not supported by Meerschaum SQLConnector")
218        if not self.__dict__.get('uri'):
219            self.verify_attributes(
220                self.flavor_configs[self.flavor].get('requirements', set()),
221                debug=debug,
222            )
223
224        if wait:
225            from meerschaum.connectors.poll import retry_connect
226            retry_connect(connector=self, debug=debug)
227
228        if connect:
229            if not self.test_connection(debug=debug):
230                warn(f"Failed to connect with connector '{self}'!", stack=False)
Parameters
  • label (str, default 'main'): The identifying label for the connector. E.g. for sql:main, 'main' is the label. Defaults to 'main'.
  • flavor (Optional[str], default None): The database flavor, e.g. 'sqlite', 'postgresql', 'cockroachdb', etc. To see supported flavors, run the bootstrap connectors command.
  • wait (bool, default False): If True, block until a database connection has been made. Defaults to False.
  • connect (bool, default False): If True, immediately attempt to connect the database and raise a warning if the connection fails. Defaults to False.
  • debug (bool, default False): Verbosity toggle. Defaults to False.
  • kw (Any): All other arguments will be passed to the connector's attributes. Therefore, a connector may be made without being registered, as long enough parameters are supplied to the constructor.
Session
232    @property
233    def Session(self):
234        if '_Session' not in self.__dict__:
235            if self.engine is None:
236                return None
237
238            from meerschaum.utils.packages import attempt_import
239            sqlalchemy_orm = attempt_import('sqlalchemy.orm', lazy=False)
240            session_factory = sqlalchemy_orm.sessionmaker(self.engine)
241            self._Session = sqlalchemy_orm.scoped_session(session_factory)
242
243        return self._Session
engine
245    @property
246    def engine(self):
247        """
248        Return the SQLAlchemy engine connected to the configured database.
249        """
250        import os
251        import threading
252        if '_engine' not in self.__dict__:
253            self._engine, self._engine_str = self.create_engine(include_uri=True)
254
255        same_process = os.getpid() == self._pid
256        same_thread = threading.current_thread().ident == self._thread_ident
257
258        ### handle child processes
259        if not same_process:
260            self._pid = os.getpid()
261            self._thread = threading.current_thread()
262            warn("Different PID detected. Disposing of connections...")
263            self._engine.dispose()
264
265        ### handle different threads
266        if not same_thread:
267            if self.flavor == 'duckdb':
268                warn("Different thread detected.")
269                self._engine.dispose()
270
271        return self._engine

Return the SQLAlchemy engine connected to the configured database.

DATABASE_URL: str
273    @property
274    def DATABASE_URL(self) -> str:
275        """
276        Return the URI connection string (alias for `SQLConnector.URI`.
277        """
278        _ = self.engine
279        return str(self._engine_str)

Return the URI connection string (alias for SQLConnector.URI.

URI: str
281    @property
282    def URI(self) -> str:
283        """
284        Return the URI connection string.
285        """
286        _ = self.engine
287        return str(self._engine_str)

Return the URI connection string.

IS_THREAD_SAFE: str
289    @property
290    def IS_THREAD_SAFE(self) -> str:
291        """
292        Return whether this connector may be multithreaded.
293        """
294        if self.flavor in ('duckdb', 'oracle'):
295            return False
296        if self.flavor in ('sqlite', 'geopackage'):
297            return ':memory:' not in self.URI
298        return True

Return whether this connector may be multithreaded.

metadata
300    @property
301    def metadata(self):
302        """
303        Return the metadata bound to this configured schema.
304        """
305        from meerschaum.utils.packages import attempt_import
306        sqlalchemy = attempt_import('sqlalchemy', lazy=False)
307        if '_metadata' not in self.__dict__:
308            self._metadata = sqlalchemy.MetaData(schema=self.schema)
309        return self._metadata

Return the metadata bound to this configured schema.

instance_schema
311    @property
312    def instance_schema(self):
313        """
314        Return the schema name for Meerschaum tables. 
315        """
316        return self.schema

Return the schema name for Meerschaum tables.

internal_schema
318    @property
319    def internal_schema(self):
320        """
321        Return the schema name for internal tables. 
322        """
323        from meerschaum._internal.static import STATIC_CONFIG
324        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
325        schema_name = self.__dict__.get('internal_schema', None) or (
326            STATIC_CONFIG['sql']['internal_schema']
327            if self.flavor not in NO_SCHEMA_FLAVORS
328            else self.schema
329        )
330
331        if '_internal_schema' not in self.__dict__:
332            self._internal_schema = schema_name
333        return self._internal_schema

Return the schema name for internal tables.

db: 'Optional[databases.Database]'
335    @property
336    def db(self) -> Optional[databases.Database]:
337        from meerschaum.utils.packages import attempt_import
338        databases = attempt_import('databases', lazy=False, install=True)
339        url = self.DATABASE_URL
340        if 'mysql' in url:
341            url = url.replace('+pymysql', '')
342        if '_db' not in self.__dict__:
343            try:
344                self._db = databases.Database(url)
345            except KeyError:
346                ### Likely encountered an unsupported flavor.
347                from meerschaum.utils.warnings import warn
348                self._db = None
349        return self._db
db_version: Optional[str]
351    @property
352    def db_version(self) -> Union[str, None]:
353        """
354        Return the database version.
355        """
356        _db_version = self.__dict__.get('_db_version', None)
357        if _db_version is not None:
358            return _db_version
359
360        from meerschaum.utils.sql import get_db_version
361        self._db_version = get_db_version(self)
362        return self._db_version

Return the database version.

schema: Optional[str]
364    @property
365    def schema(self) -> Union[str, None]:
366        """
367        Return the default schema to use.
368        A value of `None` will not prepend a schema.
369        """
370        if 'schema' in self.__dict__:
371            return self.__dict__['schema']
372
373        from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
374        if self.flavor in NO_SCHEMA_FLAVORS:
375            self.__dict__['schema'] = None
376            return None
377
378        sqlalchemy = mrsm.attempt_import('sqlalchemy', lazy=False)
379        _schema = sqlalchemy.inspect(self.engine).default_schema_name
380        self.__dict__['schema'] = _schema
381        return _schema

Return the default schema to use. A value of None will not prepend a schema.

def get_metadata_cache_path(self, kind: str = 'json') -> pathlib.Path:
383    def get_metadata_cache_path(self, kind: str = 'json') -> pathlib.Path:
384        """
385        Return the path to the file to which to write metadata cache.
386        """
387        from meerschaum.config.paths import SQL_CONN_CACHE_RESOURCES_PATH
388        filename = (
389            f'{self.label}-metadata.pkl'
390            if kind == 'pkl'
391            else f'{self.label}.json'
392        )
393        return SQL_CONN_CACHE_RESOURCES_PATH / filename

Return the path to the file to which to write metadata cache.

flavor_configs = {'timescaledb': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'username', 'host', 'database', 'password'}, 'defaults': {'port': 5432}}, 'timescaledb-ha': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'username', 'host', 'database', 'password'}, 'defaults': {'port': 5432}}, 'postgresql': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'username', 'host', 'database', 'password'}, 'defaults': {'port': 5432}}, 'postgis': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'username', 'host', 'database', 'password'}, 'defaults': {'port': 5432}}, 'citus': {'engine': 'postgresql+psycopg', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {}, 'requirements': {'username', 'host', 'database', 'password'}, 'defaults': {'port': 5432}}, 'mssql': {'engine': 'mssql+pyodbc', 'create_engine': {'fast_executemany': True, 'use_insertmanyvalues': False, 'isolation_level': 'AUTOCOMMIT', 'use_setinputsizes': False, 'pool_pre_ping': True, 'ignore_no_transaction_on_rollback': True}, 'omit_create_engine': {'method'}, 'to_sql': {'method': None}, 'requirements': {'username', 'host', 'database', 'password'}, 'defaults': {'port': 1433, 'options': 'driver=ODBC Driver 18 for SQL Server&UseFMTONLY=Yes&TrustServerCertificate=yes&Encrypt=no&MARS_Connection=yes'}}, 'mysql': {'engine': 'mysql+pymysql', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'username', 'host', 'database', 'password'}, 'defaults': {'port': 3306}}, 'mariadb': {'engine': 'mysql+pymysql', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'username', 'host', 'database', 'password'}, 'defaults': {'port': 3306}}, 'oracle': {'engine': 'oracle+oracledb', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': None}, 'requirements': {'username', 'host', 'database', 'password'}, 'defaults': {'port': 1521}}, 'sqlite': {'engine': 'sqlite', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'database'}, 'defaults': {}}, 'geopackage': {'engine': 'sqlite', 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'omit_create_engine': {'method'}, 'to_sql': {'method': 'multi'}, 'requirements': {'database'}, 'defaults': {}}, 'duckdb': {'engine': 'duckdb', 'create_engine': {}, 'omit_create_engine': {'ALL'}, 'to_sql': {'method': 'multi'}, 'requirements': '', 'defaults': {}}, 'cockroachdb': {'engine': 'cockroachdb', 'omit_create_engine': {'method'}, 'create_engine': {'pool_size': 6, 'max_overflow': 6, 'pool_recycle': 3600, 'connect_args': {}}, 'to_sql': {'method': 'multi'}, 'requirements': {'host'}, 'defaults': {'port': 26257, 'database': 'defaultdb', 'username': 'root', 'password': 'admin'}}}
def create_engine( self, include_uri: bool = False, debug: bool = False, **kw) -> 'sqlalchemy.engine.Engine':
 45def create_engine(
 46    self,
 47    include_uri: bool = False,
 48    debug: bool = False,
 49    **kw
 50) -> 'sqlalchemy.engine.Engine':
 51    """Create a sqlalchemy engine by building the engine string."""
 52    from meerschaum.utils.packages import attempt_import
 53    from meerschaum.utils.warnings import error, warn
 54    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
 55    import urllib
 56    import copy
 57    ### Install and patch required drivers.
 58    if self.flavor in install_flavor_drivers:
 59        _ = attempt_import(
 60            *install_flavor_drivers[self.flavor],
 61            debug=debug,
 62            lazy=False,
 63            warn=False,
 64        )
 65        if self.flavor == 'mssql':
 66            _init_mssql_sqlalchemy()
 67
 68    ### supplement missing values with defaults (e.g. port number)
 69    for a, value in flavor_configs[self.flavor]['defaults'].items():
 70        if a not in self.__dict__:
 71            self.__dict__[a] = value
 72
 73    ### Verify that everything is in order.
 74    if self.flavor not in flavor_configs:
 75        error(f"Cannot create a connector with the flavor '{self.flavor}'.")
 76
 77    _engine = flavor_configs[self.flavor].get('engine', None)
 78    _username = self.__dict__.get('username', None)
 79    _password = self.__dict__.get('password', None)
 80    _host = self.__dict__.get('host', None)
 81    _port = self.__dict__.get('port', None)
 82    _database = self.__dict__.get('database', None)
 83    if _database == '{SQLITE_DB_PATH}':
 84        from meerschaum.config.paths import SQLITE_DB_PATH
 85        _database = SQLITE_DB_PATH.as_posix()
 86    _options = self.__dict__.get('options', {})
 87    if isinstance(_options, str):
 88        _options = dict(urllib.parse.parse_qsl(_options))
 89    _uri = self.__dict__.get('uri', None)
 90
 91    ### Handle registering specific dialects (due to installing in virtual environments).
 92    if self.flavor in flavor_dialects:
 93        sqlalchemy.dialects.registry.register(*flavor_dialects[self.flavor])
 94
 95    ### self._sys_config was deepcopied and can be updated safely
 96    if self.flavor in ("sqlite", "duckdb", "geopackage"):
 97        engine_str = f"{_engine}:///{_database}" if not _uri else _uri
 98        if 'create_engine' not in self._sys_config:
 99            self._sys_config['create_engine'] = {}
100        if 'connect_args' not in self._sys_config['create_engine']:
101            self._sys_config['create_engine']['connect_args'] = {}
102        self._sys_config['create_engine']['connect_args'].update({"check_same_thread": False})
103    else:
104        engine_str = (
105            _engine + "://" + (_username if _username is not None else '') +
106            ((":" + urllib.parse.quote_plus(_password)) if _password is not None else '') +
107            "@" + _host + ((":" + str(_port)) if _port is not None else '') +
108            (("/" + _database) if _database is not None else '')
109            + (("?" + urllib.parse.urlencode(_options)) if _options else '')
110        ) if not _uri else _uri
111
112        ### Sometimes the timescaledb:// flavor can slip in.
113        if _uri and self.flavor in _uri:
114            if self.flavor in ('timescaledb', 'timescaledb-ha', 'postgis'):
115                engine_str = engine_str.replace(self.flavor, 'postgresql', 1)
116            elif _uri.startswith('postgresql://'):
117                engine_str = engine_str.replace('postgresql://', 'postgresql+psycopg2://')
118
119    if debug:
120        dprint(
121            (
122                (engine_str.replace(':' + _password, ':' + ('*' * len(_password))))
123                    if _password is not None else engine_str
124            ) + '\n' + f"{self._sys_config.get('create_engine', {}).get('connect_args', {})}"
125        )
126
127    _kw_copy = copy.deepcopy(kw)
128
129    ### NOTE: Order of inheritance:
130    ###       1. Defaults
131    ###       2. System configuration
132    ###       3. Connector configuration
133    ###       4. Keyword arguments
134    _create_engine_args = flavor_configs.get(self.flavor, {}).get('create_engine', {})
135    def _apply_create_engine_args(update):
136        if 'ALL' not in flavor_configs[self.flavor].get('omit_create_engine', {}):
137            _create_engine_args.update(
138                { k: v for k, v in update.items()
139                    if 'omit_create_engine' not in flavor_configs[self.flavor]
140                        or k not in flavor_configs[self.flavor].get('omit_create_engine')
141                }
142            )
143    _apply_create_engine_args(self._sys_config.get('create_engine', {}))
144    _apply_create_engine_args(self.__dict__.get('create_engine', {}))
145    _apply_create_engine_args(_kw_copy)
146
147    try:
148        engine = sqlalchemy.create_engine(
149            engine_str,
150            ### I know this looks confusing, and maybe it's bad code,
151            ### but it's simple. It dynamically parses the config string
152            ### and splits it to separate the class name (QueuePool)
153            ### from the module name (sqlalchemy.pool).
154            poolclass    = getattr(
155                attempt_import(
156                    ".".join(self._sys_config['poolclass'].split('.')[:-1])
157                ),
158                self._sys_config['poolclass'].split('.')[-1]
159            ),
160            echo         = debug,
161            **_create_engine_args
162        )
163    except Exception:
164        warn(f"Failed to create connector '{self}':\n{traceback.format_exc()}", stack=False)
165        engine = None
166
167    if include_uri:
168        return engine, engine_str
169    return engine

Create a sqlalchemy engine by building the engine string.

def read( self, query_or_table: 'Union[str, sqlalchemy.Query]', params: Union[Dict[str, Any], List[str], NoneType] = None, dtype: Optional[Dict[str, Any]] = None, coerce_float: bool = True, chunksize: Optional[int] = -1, workers: Optional[int] = None, chunk_hook: Optional[Callable[[pandas.core.frame.DataFrame], Any]] = None, as_hook_results: bool = False, chunks: Optional[int] = None, schema: Optional[str] = None, as_chunks: bool = False, as_iterator: bool = False, as_dask: bool = False, index_col: Optional[str] = None, silent: bool = False, debug: bool = False, **kw: Any) -> 'Union[pandas.DataFrame, dask.DataFrame, List[pandas.DataFrame], List[Any], None]':
 35def read(
 36    self,
 37    query_or_table: Union[str, sqlalchemy.Query],
 38    params: Union[Dict[str, Any], List[str], None] = None,
 39    dtype: Optional[Dict[str, Any]] = None,
 40    coerce_float: bool = True,
 41    chunksize: Optional[int] = -1,
 42    workers: Optional[int] = None,
 43    chunk_hook: Optional[Callable[[pandas.DataFrame], Any]] = None,
 44    as_hook_results: bool = False,
 45    chunks: Optional[int] = None,
 46    schema: Optional[str] = None,
 47    as_chunks: bool = False,
 48    as_iterator: bool = False,
 49    as_dask: bool = False,
 50    index_col: Optional[str] = None,
 51    silent: bool = False,
 52    debug: bool = False,
 53    **kw: Any
 54) -> Union[
 55    pandas.DataFrame,
 56    dask.DataFrame,
 57    List[pandas.DataFrame],
 58    List[Any],
 59    None,
 60]:
 61    """
 62    Read a SQL query or table into a pandas dataframe.
 63
 64    Parameters
 65    ----------
 66    query_or_table: Union[str, sqlalchemy.Query]
 67        The SQL query (sqlalchemy Query or string) or name of the table from which to select.
 68
 69    params: Optional[Dict[str, Any]], default None
 70        `List` or `Dict` of parameters to pass to `pandas.read_sql()`.
 71        See the pandas documentation for more information:
 72        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html
 73
 74    dtype: Optional[Dict[str, Any]], default None
 75        A dictionary of data types to pass to `pandas.read_sql()`.
 76        See the pandas documentation for more information:
 77        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html
 78
 79    chunksize: Optional[int], default -1
 80        How many chunks to read at a time. `None` will read everything in one large chunk.
 81        Defaults to system configuration.
 82
 83        **NOTE:** DuckDB does not allow for chunking.
 84
 85    workers: Optional[int], default None
 86        How many threads to use when consuming the generator.
 87        Only applies if `chunk_hook` is provided.
 88
 89    chunk_hook: Optional[Callable[[pandas.DataFrame], Any]], default None
 90        Hook function to execute once per chunk, e.g. writing and reading chunks intermittently.
 91        See `--sync-chunks` for an example.
 92        **NOTE:** `as_iterator` MUST be False (default).
 93
 94    as_hook_results: bool, default False
 95        If `True`, return a `List` of the outputs of the hook function.
 96        Only applicable if `chunk_hook` is not None.
 97
 98        **NOTE:** `as_iterator` MUST be `False` (default).
 99
100    chunks: Optional[int], default None
101        Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and
102        return into a single dataframe.
103        For example, to limit the returned dataframe to 100,000 rows,
104        you could specify a `chunksize` of `1000` and `chunks` of `100`.
105
106    schema: Optional[str], default None
107        If just a table name is provided, optionally specify the table schema.
108        Defaults to `SQLConnector.schema`.
109
110    as_chunks: bool, default False
111        If `True`, return a list of DataFrames.
112        Otherwise return a single DataFrame.
113
114    as_iterator: bool, default False
115        If `True`, return the pandas DataFrame iterator.
116        `chunksize` must not be `None` (falls back to 1000 if so),
117        and hooks are not called in this case.
118
119    index_col: Optional[str], default None
120        If using Dask, use this column as the index column.
121        If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
122
123    silent: bool, default False
124        If `True`, don't raise warnings in case of errors.
125        Defaults to `False`.
126
127    Returns
128    -------
129    A `pd.DataFrame` (default case), or an iterator, or a list of dataframes / iterators,
130    or `None` if something breaks.
131
132    """
133    if chunks is not None and chunks <= 0:
134        return []
135
136    from meerschaum.utils.sql import sql_item_name, truncate_item_name
137    from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
138    from meerschaum.utils.dtypes.sql import TIMEZONE_NAIVE_FLAVORS
139    from meerschaum.utils.packages import attempt_import, import_pandas
140    from meerschaum.utils.pool import get_pool
141    from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
142    from meerschaum.utils.misc import filter_arguments
143    import warnings
144    import traceback
145    from decimal import Decimal
146
147    pd = import_pandas()
148    dd = None
149
150    is_dask = 'dask' in pd.__name__
151    pandas = attempt_import('pandas')
152    is_dask = dd is not None
153    npartitions = chunksize_to_npartitions(chunksize)
154    if is_dask:
155        chunksize = None
156
157    schema = schema or self.schema
158    utc_dt_cols = [
159        col
160        for col, typ in dtype.items()
161        if are_dtypes_equal(typ, 'datetime') and 'utc' in typ.lower()
162    ] if dtype else []
163
164    if dtype and utc_dt_cols and self.flavor in TIMEZONE_NAIVE_FLAVORS:
165        dtype = dtype.copy()
166        for col in utc_dt_cols:
167            dtype[col] = 'datetime64[us]'
168
169    pool = get_pool(workers=workers)
170    sqlalchemy = attempt_import("sqlalchemy", lazy=False)
171    default_chunksize = self._sys_config.get('chunksize', None)
172    chunksize = chunksize if chunksize != -1 else default_chunksize
173    if chunksize is None and as_iterator:
174        if not silent and self.flavor not in _disallow_chunks_flavors:
175            warn(
176                "An iterator may only be generated if chunksize is not None.\n"
177                + "Falling back to a chunksize of 1000.", stacklevel=3,
178            )
179        chunksize = 1000
180    if chunksize is not None and self.flavor in _max_chunks_flavors:
181        if chunksize > _max_chunks_flavors[self.flavor]:
182            if chunksize != default_chunksize:
183                warn(
184                    f"The specified chunksize of {chunksize} exceeds the maximum of "
185                    + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n"
186                    + f"    Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.",
187                    stacklevel=3,
188                )
189            chunksize = _max_chunks_flavors[self.flavor]
190
191    if chunksize is not None and self.flavor in _disallow_chunks_flavors:
192        chunksize = None
193
194    if debug:
195        import time
196        start = time.perf_counter()
197        dprint(f"[{self}]\n{query_or_table}")
198        dprint(f"[{self}] Fetching with chunksize: {chunksize}")
199
200    ### This might be sqlalchemy object or the string of a table name.
201    ### We check for spaces and quotes to see if it might be a weird table.
202    if (
203        ' ' not in str(query_or_table)
204        or (
205            ' ' in str(query_or_table)
206            and str(query_or_table).startswith('"')
207            and str(query_or_table).endswith('"')
208        )
209    ):
210        truncated_table_name = truncate_item_name(str(query_or_table), self.flavor)
211        if truncated_table_name != str(query_or_table) and not silent:
212            warn(
213                f"Table '{query_or_table}' is too long for '{self.flavor}',"
214                + f" will instead read the table '{truncated_table_name}'."
215            )
216
217        query_or_table = sql_item_name(str(query_or_table), self.flavor, schema)
218        if debug:
219            dprint(f"[{self}] Reading from table {query_or_table}")
220        formatted_query = sqlalchemy.text("SELECT * FROM " + str(query_or_table))
221        str_query = f"SELECT * FROM {query_or_table}"
222    else:
223        str_query = query_or_table
224
225    formatted_query = (
226        sqlalchemy.text(str_query)
227        if not is_dask and isinstance(str_query, str)
228        else format_sql_query_for_dask(str_query)
229    )
230
231    def _get_chunk_args_kwargs(_chunk):
232        return filter_arguments(
233            chunk_hook,
234            _chunk,
235            workers=workers,
236            chunksize=chunksize,
237            debug=debug,
238            **kw
239        )
240
241    chunk_list = []
242    chunk_hook_results = []
243    def _process_chunk(_chunk, _retry_on_failure: bool = True):
244        if self.flavor in TIMEZONE_NAIVE_FLAVORS:
245            for col in utc_dt_cols:
246                _chunk[col] = coerce_timezone(_chunk[col], strip_utc=False)
247        if not as_hook_results:
248            chunk_list.append(_chunk)
249
250        if chunk_hook is None:
251            return None
252
253        chunk_args, chunk_kwargs = _get_chunk_args_kwargs(_chunk)
254
255        result = None
256        try:
257            result = chunk_hook(*chunk_args, **chunk_kwargs)
258        except Exception:
259            result = False, traceback.format_exc()
260            from meerschaum.utils.formatting import get_console
261            if not silent:
262                get_console().print_exception()
263
264        ### If the chunk fails to process, try it again one more time.
265        if isinstance(result, tuple) and result[0] is False:
266            if _retry_on_failure:
267                return _process_chunk(_chunk, _retry_on_failure=False)
268
269        return result
270
271    try:
272        stream_results = not as_iterator and chunk_hook is not None and chunksize is not None
273        with warnings.catch_warnings():
274            warnings.filterwarnings('ignore', 'case sensitivity issues')
275
276            read_sql_query_kwargs = {
277                'params': params,
278                'dtype': dtype,
279                'coerce_float': coerce_float,
280                'index_col': index_col,
281            }
282            if is_dask:
283                if index_col is None:
284                    dd = None
285                    pd = attempt_import('pandas')
286                    read_sql_query_kwargs.update({
287                        'chunksize': chunksize,
288                    })
289            else:
290                read_sql_query_kwargs.update({
291                    'chunksize': chunksize,
292                })
293
294            if is_dask and dd is not None:
295                ddf = dd.read_sql_query(
296                    formatted_query,
297                    self.URI,
298                    **read_sql_query_kwargs
299                )
300            else:
301
302                def get_chunk_generator(connectable):
303                    chunk_generator = pd.read_sql_query(
304                        formatted_query,
305                        connectable, # NOTE: test this against `self.engine`.
306                        **read_sql_query_kwargs
307                    )
308
309                    to_return = (
310                        (
311                            chunk_generator
312                            if not (as_hook_results or chunksize is None)
313                            else (
314                                _process_chunk(_chunk)
315                                for _chunk in chunk_generator
316                            )
317                        )
318                        if as_iterator or chunksize is None
319                        else (
320                            list(pool.imap(_process_chunk, chunk_generator))
321                            if as_hook_results
322                            else None
323                        )
324                    )
325                    return chunk_generator, to_return
326
327                if self.flavor in SKIP_READ_TRANSACTION_FLAVORS:
328                    chunk_generator, to_return = get_chunk_generator(self.engine)
329                else:
330                    with self.engine.begin() as transaction:
331                        with transaction.execution_options(
332                            stream_results=stream_results,
333                        ) as connection:
334                            chunk_generator, to_return = get_chunk_generator(connection)
335
336                if to_return is not None:
337                    return to_return
338
339    except Exception as e:
340        if debug:
341            dprint(f"[{self}] Failed to execute query:\n\n{query_or_table}\n\n")
342        if not silent:
343            warn(str(e), stacklevel=3)
344        from meerschaum.utils.formatting import get_console
345        if not silent:
346            get_console().print_exception()
347
348        return None
349
350    if is_dask and dd is not None:
351        ddf = ddf.reset_index()
352        return ddf
353
354    chunk_list = []
355    read_chunks = 0
356    chunk_hook_results = []
357    if chunksize is None:
358        chunk_list.append(chunk_generator)
359    elif as_iterator:
360        return chunk_generator
361    else:
362        try:
363            for chunk in chunk_generator:
364                if chunk_hook is not None:
365                    chunk_args, chunk_kwargs = _get_chunk_args_kwargs(chunk)
366                    chunk_hook_results.append(chunk_hook(*chunk_args, **chunk_kwargs))
367                chunk_list.append(chunk)
368                read_chunks += 1
369                if chunks is not None and read_chunks >= chunks:
370                    break
371        except Exception as e:
372            warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3)
373            from meerschaum.utils.formatting import get_console
374            if not silent:
375                get_console().print_exception()
376
377    read_chunks = 0
378    try:
379        for chunk in chunk_generator:
380            if chunk_hook is not None:
381                chunk_args, chunk_kwargs = _get_chunk_args_kwargs(chunk)
382                chunk_hook_results.append(chunk_hook(*chunk_args, **chunk_kwargs))
383            chunk_list.append(chunk)
384            read_chunks += 1
385            if chunks is not None and read_chunks >= chunks:
386                break
387    except Exception as e:
388        warn(f"[{self}] Failed to retrieve query results:\n" + str(e), stacklevel=3)
389        from meerschaum.utils.formatting import get_console
390        if not silent:
391            get_console().print_exception()
392
393        return None
394
395    ### If no chunks returned, read without chunks
396    ### to get columns
397    if len(chunk_list) == 0:
398        with warnings.catch_warnings():
399            warnings.filterwarnings('ignore', 'case sensitivity issues')
400            _ = read_sql_query_kwargs.pop('chunksize', None)
401            with self.engine.begin() as connection:
402                chunk_list.append(
403                    pd.read_sql_query(
404                        formatted_query,
405                        connection,
406                        **read_sql_query_kwargs
407                    )
408                )
409
410    ### call the hook on any missed chunks.
411    if chunk_hook is not None and len(chunk_list) > len(chunk_hook_results):
412        for c in chunk_list[len(chunk_hook_results):]:
413            chunk_args, chunk_kwargs = _get_chunk_args_kwargs(c)
414            chunk_hook_results.append(chunk_hook(*chunk_args, **chunk_kwargs))
415
416    ### chunksize is not None so must iterate
417    if debug:
418        end = time.perf_counter()
419        dprint(f"Fetched {len(chunk_list)} chunks in {round(end - start, 2)} seconds.")
420
421    if as_hook_results:
422        return chunk_hook_results
423    
424    ### Skip `pd.concat()` if `as_chunks` is specified.
425    if as_chunks:
426        for c in chunk_list:
427            c.reset_index(drop=True, inplace=True)
428            for col in get_numeric_cols(c):
429                c[col] = c[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x)
430        return chunk_list
431
432    df = pd.concat(chunk_list).reset_index(drop=True)
433    ### NOTE: The calls to `canonical()` are to drop leading and trailing zeroes.
434    for col in get_numeric_cols(df):
435        df[col] = df[col].apply(lambda x: x.canonical() if isinstance(x, Decimal) else x)
436
437    return df

Read a SQL query or table into a pandas dataframe.

Parameters
  • query_or_table (Union[str, sqlalchemy.Query]): The SQL query (sqlalchemy Query or string) or name of the table from which to select.
  • params (Optional[Dict[str, Any]], default None): List or Dict of parameters to pass to pandas.read_sql(). See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html
  • dtype (Optional[Dict[str, Any]], default None): A dictionary of data types to pass to pandas.read_sql(). See the pandas documentation for more information: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql_query.html
  • chunksize (Optional[int], default -1): How many chunks to read at a time. None will read everything in one large chunk. Defaults to system configuration.

    NOTE: DuckDB does not allow for chunking.

  • workers (Optional[int], default None): How many threads to use when consuming the generator. Only applies if chunk_hook is provided.
  • chunk_hook (Optional[Callable[[pandas.DataFrame], Any]], default None): Hook function to execute once per chunk, e.g. writing and reading chunks intermittently. See --sync-chunks for an example. NOTE: as_iterator MUST be False (default).
  • as_hook_results (bool, default False): If True, return a List of the outputs of the hook function. Only applicable if chunk_hook is not None.

    NOTE: as_iterator MUST be False (default).

  • chunks (Optional[int], default None): Limit the number of chunks to read into memory, i.e. how many chunks to retrieve and return into a single dataframe. For example, to limit the returned dataframe to 100,000 rows, you could specify a chunksize of 1000 and chunks of 100.
  • schema (Optional[str], default None): If just a table name is provided, optionally specify the table schema. Defaults to SQLConnector.schema.
  • as_chunks (bool, default False): If True, return a list of DataFrames. Otherwise return a single DataFrame.
  • as_iterator (bool, default False): If True, return the pandas DataFrame iterator. chunksize must not be None (falls back to 1000 if so), and hooks are not called in this case.
  • index_col (Optional[str], default None): If using Dask, use this column as the index column. If omitted, a Pandas DataFrame will be fetched and converted to a Dask DataFrame.
  • silent (bool, default False): If True, don't raise warnings in case of errors. Defaults to False.
Returns
  • A pd.DataFrame (default case), or an iterator, or a list of dataframes / iterators,
  • or None if something breaks.
def value(self, query: str, *args: Any, use_pandas: bool = False, **kw: Any) -> Any:
440def value(
441    self,
442    query: str,
443    *args: Any,
444    use_pandas: bool = False,
445    **kw: Any
446) -> Any:
447    """
448    Execute the provided query and return the first value.
449
450    Parameters
451    ----------
452    query: str
453        The SQL query to execute.
454        
455    *args: Any
456        The arguments passed to `meerschaum.connectors.sql.SQLConnector.exec`
457        if `use_pandas` is `False` (default) or to `meerschaum.connectors.sql.SQLConnector.read`.
458        
459    use_pandas: bool, default False
460        If `True`, use `meerschaum.connectors.SQLConnector.read`, otherwise use
461        `meerschaum.connectors.sql.SQLConnector.exec` (default).
462        **NOTE:** This is always `True` for DuckDB.
463
464    **kw: Any
465        See `args`.
466
467    Returns
468    -------
469    Any value returned from the query.
470
471    """
472    from meerschaum.utils.packages import attempt_import
473    if self.flavor == 'duckdb':
474        use_pandas = True
475    if use_pandas:
476        try:
477            return self.read(query, *args, **kw).iloc[0, 0]
478        except Exception:
479            return None
480
481    _close = kw.get('close', True)
482    _commit = kw.get('commit', (self.flavor != 'mssql'))
483
484    try:
485        result, connection = self.exec(
486            query,
487            *args,
488            with_connection=True,
489            close=False,
490            commit=_commit,
491            **kw
492        )
493        first = result.first() if result is not None else None
494        _val = first[0] if first is not None else None
495    except Exception as e:
496        warn(e, stacklevel=3)
497        return None
498    if _close:
499        try:
500            connection.close()
501        except Exception as e:
502            warn("Failed to close connection with exception:\n" + str(e))
503    return _val

Execute the provided query and return the first value.

Parameters
Returns
  • Any value returned from the query.
def exec( self, query: str, *args: Any, silent: bool = False, debug: bool = False, commit: Optional[bool] = None, close: Optional[bool] = None, with_connection: bool = False, _connection=None, _transaction=None, **kw: Any) -> 'Union[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.cursor.LegacyCursorResult, Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection], Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection], None]':
517def exec(
518    self,
519    query: str,
520    *args: Any,
521    silent: bool = False,
522    debug: bool = False,
523    commit: Optional[bool] = None,
524    close: Optional[bool] = None,
525    with_connection: bool = False,
526    _connection=None,
527    _transaction=None,
528    **kw: Any
529) -> Union[
530        sqlalchemy.engine.result.resultProxy,
531        sqlalchemy.engine.cursor.LegacyCursorResult,
532        Tuple[sqlalchemy.engine.result.resultProxy, sqlalchemy.engine.base.Connection],
533        Tuple[sqlalchemy.engine.cursor.LegacyCursorResult, sqlalchemy.engine.base.Connection],
534        None
535]:
536    """
537    Execute SQL code and return the `sqlalchemy` result, e.g. when calling stored procedures.
538
539    If inserting data, please use bind variables to avoid SQL injection!
540
541    Parameters
542    ----------
543    query: Union[str, List[str], Tuple[str]]
544        The query to execute.
545        If `query` is a list or tuple, call `self.exec_queries()` instead.
546
547    args: Any
548        Arguments passed to `sqlalchemy.engine.execute`.
549
550    silent: bool, default False
551        If `True`, suppress warnings.
552
553    commit: Optional[bool], default None
554        If `True`, commit the changes after execution.
555        Causes issues with flavors like `'mssql'`.
556        This does not apply if `query` is a list of strings.
557
558    close: Optional[bool], default None
559        If `True`, close the connection after execution.
560        Causes issues with flavors like `'mssql'`.
561        This does not apply if `query` is a list of strings.
562
563    with_connection: bool, default False
564        If `True`, return a tuple including the connection object.
565        This does not apply if `query` is a list of strings.
566
567    Returns
568    -------
569    The `sqlalchemy` result object, or a tuple with the connection if `with_connection` is provided.
570
571    """
572    if isinstance(query, (list, tuple)):
573        return self.exec_queries(
574            list(query),
575            *args,
576            silent=silent,
577            debug=debug,
578            **kw
579        )
580
581    from meerschaum.utils.packages import attempt_import
582    sqlalchemy = attempt_import("sqlalchemy", lazy=False)
583    if debug:
584        dprint(f"[{self}] Executing query:\n{query}")
585
586    _close = close if close is not None else (self.flavor != 'mssql')
587    _commit = commit if commit is not None else (
588        (self.flavor != 'mssql' or 'select' not in str(query).lower())
589    )
590
591    ### Select and Insert objects need to be compiled (SQLAlchemy 2.0.0+).
592    if not hasattr(query, 'compile'):
593        query = sqlalchemy.text(query)
594
595    connection = _connection if _connection is not None else self.get_connection()
596
597    try:
598        transaction = (
599            _transaction
600            if _transaction is not None else (
601                connection.begin()
602                if _commit
603                else None
604            )
605        )
606    except sqlalchemy.exc.InvalidRequestError as e:
607        if _connection is not None or _transaction is not None:
608            raise e
609        connection = self.get_connection(rebuild=True)
610        transaction = connection.begin()
611
612    if transaction is not None and not transaction.is_active and _transaction is not None:
613        connection = self.get_connection(rebuild=True)
614        transaction = connection.begin() if _commit else None
615
616    result = None
617    try:
618        result = connection.execute(query, *args, **kw)
619        if _commit:
620            transaction.commit()
621    except Exception as e:
622        if debug:
623            dprint(f"[{self}] Failed to execute query:\n\n{query}\n\n{e}")
624        if not silent:
625            warn(str(e), stacklevel=3)
626        result = None
627        if _commit:
628            if debug:
629                dprint(f"[{self}] Rolling back failed transaction...")
630            transaction.rollback()
631            connection = self.get_connection(rebuild=True)
632    finally:
633        if _close:
634            connection.close()
635
636    if debug:
637        dprint(f"[{self}] Done executing.")
638
639    if with_connection:
640        return result, connection
641
642    return result

Execute SQL code and return the sqlalchemy result, e.g. when calling stored procedures.

If inserting data, please use bind variables to avoid SQL injection!

Parameters
  • query (Union[str, List[str], Tuple[str]]): The query to execute. If query is a list or tuple, call self.exec_queries() instead.
  • args (Any): Arguments passed to sqlalchemy.engine.execute.
  • silent (bool, default False): If True, suppress warnings.
  • commit (Optional[bool], default None): If True, commit the changes after execution. Causes issues with flavors like 'mssql'. This does not apply if query is a list of strings.
  • close (Optional[bool], default None): If True, close the connection after execution. Causes issues with flavors like 'mssql'. This does not apply if query is a list of strings.
  • with_connection (bool, default False): If True, return a tuple including the connection object. This does not apply if query is a list of strings.
Returns
  • The sqlalchemy result object, or a tuple with the connection if with_connection is provided.
def execute( self, *args: Any, **kw: Any) -> 'Optional[sqlalchemy.engine.result.resultProxy]':
506def execute(
507    self,
508    *args : Any,
509    **kw : Any
510) -> Optional[sqlalchemy.engine.result.resultProxy]:
511    """
512    An alias for `meerschaum.connectors.sql.SQLConnector.exec`.
513    """
514    return self.exec(*args, **kw)
def to_sql( self, df: pandas.core.frame.DataFrame, name: str = None, index: bool = False, if_exists: str = 'replace', method: str = '', chunksize: Optional[int] = -1, schema: Optional[str] = None, safe_copy: bool = True, silent: bool = False, debug: bool = False, as_tuple: bool = False, as_dict: bool = False, _connection=None, _transaction=None, **kw) -> Union[bool, Tuple[bool, str]]:
 746def to_sql(
 747    self,
 748    df: pandas.DataFrame,
 749    name: str = None,
 750    index: bool = False,
 751    if_exists: str = 'replace',
 752    method: str = "",
 753    chunksize: Optional[int] = -1,
 754    schema: Optional[str] = None,
 755    safe_copy: bool = True,
 756    silent: bool = False,
 757    debug: bool = False,
 758    as_tuple: bool = False,
 759    as_dict: bool = False,
 760    _connection=None,
 761    _transaction=None,
 762    **kw
 763) -> Union[bool, SuccessTuple]:
 764    """
 765    Upload a DataFrame's contents to the SQL server.
 766
 767    Parameters
 768    ----------
 769    df: pd.DataFrame
 770        The DataFrame to be inserted.
 771
 772    name: str
 773        The name of the table to be created.
 774
 775    index: bool, default False
 776        If True, creates the DataFrame's indices as columns.
 777
 778    if_exists: str, default 'replace'
 779        Drop and create the table ('replace') or append if it exists
 780        ('append') or raise Exception ('fail').
 781        Options are ['replace', 'append', 'fail'].
 782
 783    method: str, default ''
 784        None or multi. Details on pandas.to_sql.
 785
 786    chunksize: Optional[int], default -1
 787        How many rows to insert at a time.
 788
 789    schema: Optional[str], default None
 790        Optionally override the schema for the table.
 791        Defaults to `SQLConnector.schema`.
 792
 793    safe_copy: bool, defaul True
 794        If `True`, copy the dataframe before making any changes.
 795
 796    as_tuple: bool, default False
 797        If `True`, return a (success_bool, message) tuple instead of a `bool`.
 798        Defaults to `False`.
 799
 800    as_dict: bool, default False
 801        If `True`, return a dictionary of transaction information.
 802        The keys are `success`, `msg`, `start`, `end`, `duration`, `num_rows`, `chunksize`,
 803        `method`, and `target`.
 804
 805    kw: Any
 806        Additional arguments will be passed to the DataFrame's `to_sql` function
 807
 808    Returns
 809    -------
 810    Either a `bool` or a `SuccessTuple` (depends on `as_tuple`).
 811    """
 812    import time
 813    import json
 814    from datetime import timedelta
 815    from meerschaum.utils.warnings import error, warn
 816    import warnings
 817    import functools
 818    import traceback
 819
 820    if name is None:
 821        error(f"Name must not be `None` to insert data into {self}.")
 822
 823    ### We're requiring `name` to be positional, and sometimes it's passed in from background jobs.
 824    kw.pop('name', None)
 825
 826    schema = schema or self.schema
 827
 828    from meerschaum.utils.sql import (
 829        sql_item_name,
 830        table_exists,
 831        json_flavors,
 832        truncate_item_name,
 833        DROP_IF_EXISTS_FLAVORS,
 834    )
 835    from meerschaum.utils.dataframe import (
 836        get_json_cols,
 837        get_numeric_cols,
 838        get_uuid_cols,
 839        get_bytes_cols,
 840        get_geometry_cols,
 841    )
 842    from meerschaum.utils.dtypes import (
 843        are_dtypes_equal,
 844        coerce_timezone,
 845        encode_bytes_for_bytea,
 846        serialize_bytes,
 847        serialize_decimal,
 848        serialize_geometry,
 849        json_serialize_value,
 850        get_geometry_type_srid,
 851    )
 852    from meerschaum.utils.dtypes.sql import (
 853        PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
 854        get_db_type_from_pd_type,
 855        get_pd_type_from_db_type,
 856        get_numeric_precision_scale,
 857    )
 858    from meerschaum.utils.misc import interval_str
 859    from meerschaum.connectors.sql._create_engine import flavor_configs
 860    from meerschaum.utils.packages import attempt_import, import_pandas
 861    sqlalchemy = attempt_import('sqlalchemy', debug=debug, lazy=False)
 862    pd = import_pandas()
 863    is_dask = 'dask' in df.__module__
 864
 865    bytes_cols = get_bytes_cols(df)
 866    numeric_cols = get_numeric_cols(df)
 867    geometry_cols = get_geometry_cols(df)
 868    ### NOTE: This excludes non-numeric serialized Decimals (e.g. SQLite).
 869    numeric_cols_dtypes = {
 870        col: typ
 871        for col, typ in kw.get('dtype', {}).items()
 872        if (
 873            col in df.columns
 874            and 'numeric' in str(typ).lower()
 875        )
 876    }
 877    numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols])
 878    numeric_cols_precisions_scales = {
 879        col: (
 880            (typ.precision, typ.scale)
 881            if hasattr(typ, 'precision')
 882            else get_numeric_precision_scale(self.flavor)
 883        )
 884        for col, typ in numeric_cols_dtypes.items()
 885    }
 886    geometry_cols_dtypes = {
 887        col: typ
 888        for col, typ in kw.get('dtype', {}).items()
 889        if (
 890            col in df.columns
 891            and 'geometry' in str(typ).lower() or 'geography' in str(typ).lower()
 892        )
 893    }
 894    geometry_cols.extend([col for col in geometry_cols_dtypes if col not in geometry_cols])
 895    geometry_cols_types_srids = {
 896        col: (typ.geometry_type, typ.srid)
 897        if hasattr(typ, 'srid')
 898        else get_geometry_type_srid()
 899        for col, typ in geometry_cols_dtypes.items()
 900    }
 901
 902    cols_pd_types = {
 903        col: get_pd_type_from_db_type(str(typ))
 904        for col, typ in kw.get('dtype', {}).items()
 905    }
 906    cols_pd_types.update({
 907        col: f'numeric[{precision},{scale}]'
 908        for col, (precision, scale) in numeric_cols_precisions_scales.items()
 909        if precision and scale
 910    })
 911    cols_db_types = {
 912        col: get_db_type_from_pd_type(typ, flavor=self.flavor)
 913        for col, typ in cols_pd_types.items()
 914    }
 915
 916    enable_bulk_insert = mrsm.get_config(
 917        'system', 'connectors', 'sql', 'bulk_insert', self.flavor,
 918        warn=False,
 919    ) or False
 920    stats = {'target': name}
 921    ### resort to defaults if None
 922    copied = False
 923    use_bulk_insert = False
 924    if method == "":
 925        if enable_bulk_insert:
 926            method = (
 927                functools.partial(mssql_insert_json, cols_types=cols_db_types, debug=debug)
 928                if self.flavor == 'mssql'
 929                else functools.partial(psql_insert_copy, debug=debug)
 930            )
 931            use_bulk_insert = True
 932        else:
 933            ### Should resolve to 'multi' or `None`.
 934            method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
 935
 936    if bytes_cols and (use_bulk_insert or self.flavor == 'oracle'):
 937        if safe_copy and not copied:
 938            df = df.copy()
 939            copied = True
 940        bytes_serializer = (
 941            functools.partial(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
 942            if self.flavor != 'mssql'
 943            else serialize_bytes
 944        )
 945        for col in bytes_cols:
 946            df[col] = df[col].apply(bytes_serializer)
 947
 948    ### Check for numeric columns.
 949    for col in numeric_cols:
 950        precision, scale = numeric_cols_precisions_scales.get(
 951            col,
 952            get_numeric_precision_scale(self.flavor)
 953        )
 954        df[col] = df[col].apply(
 955            functools.partial(
 956                serialize_decimal,
 957                quantize=True,
 958                precision=precision,
 959                scale=scale,
 960            )
 961        )
 962
 963    geometry_format = 'wkt' if self.flavor == 'mssql' else (
 964        'gpkg_wkb'
 965        if self.flavor == 'geopackage'
 966        else 'wkb_hex'
 967    )
 968    for col in geometry_cols:
 969        geometry_type, srid = geometry_cols_types_srids.get(col, get_geometry_type_srid())
 970        with warnings.catch_warnings():
 971            warnings.simplefilter("ignore")
 972            df[col] = df[col].apply(
 973                functools.partial(
 974                    serialize_geometry,
 975                    geometry_format=geometry_format,
 976                )
 977            )
 978
 979    stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
 980
 981    default_chunksize = self._sys_config.get('chunksize', None)
 982    chunksize = chunksize if chunksize != -1 else default_chunksize
 983    if chunksize is not None and self.flavor in _max_chunks_flavors:
 984        if chunksize > _max_chunks_flavors[self.flavor]:
 985            if chunksize != default_chunksize:
 986                warn(
 987                    f"The specified chunksize of {chunksize} exceeds the maximum of "
 988                    + f"{_max_chunks_flavors[self.flavor]} for flavor '{self.flavor}'.\n"
 989                    + f"    Falling back to a chunksize of {_max_chunks_flavors[self.flavor]}.",
 990                    stacklevel = 3,
 991                )
 992            chunksize = _max_chunks_flavors[self.flavor]
 993    stats['chunksize'] = chunksize
 994
 995    success, msg = False, "Default to_sql message"
 996    start = time.perf_counter()
 997    if debug:
 998        msg = f"[{self}] Inserting {len(df)} rows with chunksize: {chunksize}..."
 999        print(msg, end="", flush=True)
1000    stats['num_rows'] = len(df)
1001
1002    ### Check if the name is too long.
1003    truncated_name = truncate_item_name(name, self.flavor)
1004    if name != truncated_name:
1005        warn(
1006            f"Table '{name}' is too long for '{self.flavor}',"
1007            f" will instead create the table '{truncated_name}'."
1008        )
1009
1010    ### filter out non-pandas args
1011    import inspect
1012    to_sql_params = inspect.signature(df.to_sql).parameters
1013    to_sql_kw = {}
1014    for k, v in kw.items():
1015        if k in to_sql_params:
1016            to_sql_kw[k] = v
1017
1018    to_sql_kw.update({
1019        'name': truncated_name,
1020        'schema': schema,
1021        ('con' if not is_dask else 'uri'): (self.engine if not is_dask else self.URI),
1022        'index': index,
1023        'if_exists': if_exists,
1024        'method': method,
1025        'chunksize': chunksize,
1026    })
1027    if is_dask:
1028        to_sql_kw.update({
1029            'parallel': True,
1030        })
1031    elif _connection is not None:
1032        to_sql_kw['con'] = _connection
1033
1034    if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
1035    if self.flavor == 'oracle':
1036        ### For some reason 'replace' doesn't work properly in pandas,
1037        ### so try dropping first.
1038        if if_exists == 'replace' and table_exists(name, self, schema=schema, debug=debug):
1039            success = self.exec(
1040                f"DROP TABLE {if_exists_str}" + sql_item_name(name, 'oracle', schema)
1041            ) is not None
1042            if not success:
1043                warn(f"Unable to drop {name}")
1044
1045        ### Enforce NVARCHAR(2000) as text instead of CLOB.
1046        dtype = to_sql_kw.get('dtype', {})
1047        for col, typ in df.dtypes.items():
1048            if are_dtypes_equal(str(typ), 'object'):
1049                dtype[col] = sqlalchemy.types.NVARCHAR(2000)
1050            elif are_dtypes_equal(str(typ), 'int'):
1051                dtype[col] = sqlalchemy.types.INTEGER
1052        to_sql_kw['dtype'] = dtype
1053    elif self.flavor == 'duckdb':
1054        dtype = to_sql_kw.get('dtype', {})
1055        dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
1056        for col in dt_cols:
1057            df[col] = coerce_timezone(df[col], strip_utc=False)
1058    elif self.flavor == 'mssql':
1059        dtype = to_sql_kw.get('dtype', {})
1060        dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
1061        new_dtype = {}
1062        for col in dt_cols:
1063            if col in dtype:
1064                continue
1065            dt_typ = get_db_type_from_pd_type(str(df.dtypes[col]), self.flavor, as_sqlalchemy=True)
1066            if col not in dtype:
1067                new_dtype[col] = dt_typ
1068
1069        dtype.update(new_dtype)
1070        to_sql_kw['dtype'] = dtype
1071
1072    ### Check for JSON columns.
1073    if self.flavor not in json_flavors:
1074        json_cols = get_json_cols(df)
1075        for col in json_cols:
1076            df[col] = df[col].apply(
1077                (
1078                    lambda x: json.dumps(x, default=json_serialize_value, sort_keys=True)
1079                    if not isinstance(x, Hashable)
1080                    else x
1081                )
1082            )
1083
1084    if PD_TO_SQLALCHEMY_DTYPES_FLAVORS['uuid'].get(self.flavor, None) != 'Uuid':
1085        uuid_cols = get_uuid_cols(df)
1086        for col in uuid_cols:
1087            df[col] = df[col].astype(str)
1088
1089    try:
1090        with warnings.catch_warnings():
1091            warnings.filterwarnings('ignore')
1092            df.to_sql(**to_sql_kw)
1093        success = True
1094    except Exception:
1095        if not silent:
1096            warn(traceback.format_exc())
1097        success, msg = False, traceback.format_exc()
1098
1099    end = time.perf_counter()
1100    if success:
1101        num_rows = len(df)
1102        msg = (
1103            f"It took {interval_str(timedelta(seconds=(end - start)))} "
1104            + f"to sync {num_rows:,} row"
1105            + ('s' if num_rows != 1 else '')
1106            + f" to {name}."
1107        )
1108    stats['start'] = start
1109    stats['end'] = end
1110    stats['duration'] = end - start
1111
1112    if debug:
1113        print(" done.", flush=True)
1114        dprint(msg)
1115
1116    stats['success'] = success
1117    stats['msg'] = msg
1118    if as_tuple:
1119        return success, msg
1120    if as_dict:
1121        return stats
1122    return success

Upload a DataFrame's contents to the SQL server.

Parameters
  • df (pd.DataFrame): The DataFrame to be inserted.
  • name (str): The name of the table to be created.
  • index (bool, default False): If True, creates the DataFrame's indices as columns.
  • if_exists (str, default 'replace'): Drop and create the table ('replace') or append if it exists ('append') or raise Exception ('fail'). Options are ['replace', 'append', 'fail'].
  • method (str, default ''): None or multi. Details on pandas.to_sql.
  • chunksize (Optional[int], default -1): How many rows to insert at a time.
  • schema (Optional[str], default None): Optionally override the schema for the table. Defaults to SQLConnector.schema.
  • safe_copy (bool, defaul True): If True, copy the dataframe before making any changes.
  • as_tuple (bool, default False): If True, return a (success_bool, message) tuple instead of a bool. Defaults to False.
  • as_dict (bool, default False): If True, return a dictionary of transaction information. The keys are success, msg, start, end, duration, num_rows, chunksize, method, and target.
  • kw (Any): Additional arguments will be passed to the DataFrame's to_sql function
Returns
  • Either a bool or a SuccessTuple (depends on as_tuple).
def exec_queries( self, queries: "List[Union[str, Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]]]]", break_on_error: bool = False, rollback: bool = True, silent: bool = False, debug: bool = False) -> 'List[Union[sqlalchemy.engine.cursor.CursorResult, None]]':
645def exec_queries(
646    self,
647    queries: List[
648        Union[
649            str,
650            Tuple[str, Callable[['sqlalchemy.orm.session.Session'], List[str]]]
651        ]
652    ],
653    break_on_error: bool = False,
654    rollback: bool = True,
655    silent: bool = False,
656    debug: bool = False,
657) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
658    """
659    Execute a list of queries in a single transaction.
660
661    Parameters
662    ----------
663    queries: List[
664        Union[
665            str,
666            Tuple[str, Callable[[], List[str]]]
667        ]
668    ]
669        The queries in the transaction to be executed.
670        If a query is a tuple, the second item of the tuple
671        will be considered a callable hook that returns a list of queries to be executed
672        before the next item in the list.
673
674    break_on_error: bool, default False
675        If `True`, stop executing when a query fails.
676
677    rollback: bool, default True
678        If `break_on_error` is `True`, rollback the transaction if a query fails.
679
680    silent: bool, default False
681        If `True`, suppress warnings.
682
683    Returns
684    -------
685    A list of SQLAlchemy results.
686    """
687    from meerschaum.utils.warnings import warn
688    from meerschaum.utils.debug import dprint
689    from meerschaum.utils.packages import attempt_import
690    sqlalchemy, sqlalchemy_orm = attempt_import('sqlalchemy', 'sqlalchemy.orm', lazy=False)
691    session = sqlalchemy_orm.Session(self.engine)
692
693    result = None
694    results = []
695    with session.begin():
696        for query in queries:
697            hook = None
698            result = None
699
700            if isinstance(query, tuple):
701                query, hook = query
702            if isinstance(query, str):
703                query = sqlalchemy.text(query)
704
705            if debug:
706                dprint(f"[{self}]\n" + str(query))
707
708            try:
709                result = session.execute(query)
710                session.flush()
711            except Exception as e:
712                msg = (f"Encountered error while executing:\n{e}")
713                if not silent:
714                    warn(msg)
715                elif debug:
716                    dprint(f"[{self}]\n" + str(msg))
717                result = None
718
719            if debug:
720                dprint(f"[{self}] Finished executing.")
721
722            if result is None and break_on_error:
723                if rollback:
724                    if debug:
725                        dprint(f"[{self}] Rolling back...")
726                    session.rollback()
727                results.append(result)
728                break
729            elif result is not None and hook is not None:
730                hook_queries = hook(session)
731                if hook_queries:
732                    hook_results = self.exec_queries(
733                        hook_queries,
734                        break_on_error = break_on_error,
735                        rollback=rollback,
736                        silent=silent,
737                        debug=debug,
738                    )
739                    result = (result, hook_results)
740
741            results.append(result)
742
743    return results

Execute a list of queries in a single transaction.

Parameters
  • queries (List[): Union[ str, Tuple[str, Callable[[], List[str]]] ]
  • ]: The queries in the transaction to be executed. If a query is a tuple, the second item of the tuple will be considered a callable hook that returns a list of queries to be executed before the next item in the list.
  • break_on_error (bool, default False): If True, stop executing when a query fails.
  • rollback (bool, default True): If break_on_error is True, rollback the transaction if a query fails.
  • silent (bool, default False): If True, suppress warnings.
Returns
  • A list of SQLAlchemy results.
def get_connection(self, rebuild: bool = False) -> "'sqlalchemy.engine.base.Connection'":
1320def get_connection(self, rebuild: bool = False) -> 'sqlalchemy.engine.base.Connection':
1321    """
1322    Return the current alive connection.
1323
1324    Parameters
1325    ----------
1326    rebuild: bool, default False
1327        If `True`, close the previous connection and open a new one.
1328
1329    Returns
1330    -------
1331    A `sqlalchemy.engine.base.Connection` object.
1332    """
1333    import threading
1334    if '_thread_connections' not in self.__dict__:
1335        self.__dict__['_thread_connections'] = {}
1336
1337    self._cleanup_connections()
1338
1339    thread_id = threading.get_ident()
1340
1341    thread_connections = self.__dict__.get('_thread_connections', {})
1342    connection = thread_connections.get(thread_id, None)
1343
1344    if rebuild and connection is not None:
1345        try:
1346            connection.close()
1347        except Exception:
1348            pass
1349
1350        _ = thread_connections.pop(thread_id, None)
1351        connection = None
1352
1353    if connection is None or connection.closed:
1354        connection = self.engine.connect()
1355        thread_connections[thread_id] = connection
1356
1357    return connection

Return the current alive connection.

Parameters
  • rebuild (bool, default False): If True, close the previous connection and open a new one.
Returns
  • A sqlalchemy.engine.base.Connection object.
def test_connection(self, **kw: Any) -> Optional[bool]:
863def test_connection(
864    self,
865    **kw: Any
866) -> Union[bool, None]:
867    """
868    Test if a successful connection to the database may be made.
869
870    Parameters
871    ----------
872    **kw:
873        The keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`.
874
875    Returns
876    -------
877    `True` if a connection is made, otherwise `False` or `None` in case of failure.
878
879    """
880    import warnings
881    from meerschaum.connectors.poll import retry_connect
882    _default_kw = {'max_retries': 1, 'retry_wait': 0, 'warn': False, 'connector': self}
883    _default_kw.update(kw)
884    with warnings.catch_warnings():
885        warnings.filterwarnings('ignore', 'Could not')
886        try:
887            return retry_connect(**_default_kw)
888        except Exception:
889            return False

Test if a successful connection to the database may be made.

Parameters
Returns
  • True if a connection is made, otherwise False or None in case of failure.
def fetch( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, str, NoneType] = '', end: Union[datetime.datetime, int, str, NoneType] = None, check_existing: bool = True, chunksize: Optional[int] = -1, workers: Optional[int] = None, debug: bool = False, **kw: Any) -> "Union['pd.DataFrame', List[Any], None]":
18def fetch(
19    self,
20    pipe: mrsm.Pipe,
21    begin: Union[datetime, int, str, None] = '',
22    end: Union[datetime, int, str, None] = None,
23    check_existing: bool = True,
24    chunksize: Optional[int] = -1,
25    workers: Optional[int] = None,
26    debug: bool = False,
27    **kw: Any
28) -> Union['pd.DataFrame', List[Any], None]:
29    """Execute the SQL definition and return a Pandas DataFrame.
30
31    Parameters
32    ----------
33    pipe: mrsm.Pipe
34        The pipe object which contains the `fetch` metadata.
35
36        - pipe.columns['datetime']: str
37            - Name of the datetime column for the remote table.
38        - pipe.parameters['fetch']: Dict[str, Any]
39            - Parameters necessary to execute a query.
40        - pipe.parameters['fetch']['definition']: str
41            - Raw SQL query to execute to generate the pandas DataFrame.
42        - pipe.parameters['fetch']['backtrack_minutes']: Union[int, float]
43            - How many minutes before `begin` to search for data (*optional*).
44
45    begin: Union[datetime, int, str, None], default None
46        Most recent datatime to search for data.
47        If `backtrack_minutes` is provided, subtract `backtrack_minutes`.
48
49    end: Union[datetime, int, str, None], default None
50        The latest datetime to search for data.
51        If `end` is `None`, do not bound 
52
53    check_existing: bool, defult True
54        If `False`, use a backtrack interval of 0 minutes.
55
56    chunksize: Optional[int], default -1
57        How many rows to load into memory at once.
58        Otherwise the entire result set is loaded into memory.
59
60    workers: Optional[int], default None
61        How many threads to use when consuming the generator.
62        Defaults to the number of cores.
63
64    debug: bool, default False
65        Verbosity toggle.
66
67    Returns
68    -------
69    A pandas DataFrame generator.
70    """
71    meta_def = self.get_pipe_metadef(
72        pipe,
73        begin=begin,
74        end=end,
75        check_existing=check_existing,
76        debug=debug,
77        **kw
78    )
79    chunks = self.read(
80        meta_def,
81        chunksize=chunksize,
82        workers=workers,
83        as_iterator=True,
84        debug=debug,
85    )
86    return chunks

Execute the SQL definition and return a Pandas DataFrame.

Parameters
  • pipe (mrsm.Pipe): The pipe object which contains the fetch metadata.

    • pipe.columns['datetime']: str
      • Name of the datetime column for the remote table.
    • pipe.parameters['fetch']: Dict[str, Any]
      • Parameters necessary to execute a query.
    • pipe.parameters['fetch']['definition']: str
      • Raw SQL query to execute to generate the pandas DataFrame.
    • pipe.parameters['fetch']['backtrack_minutes']: Union[int, float]
      • How many minutes before begin to search for data (optional).
  • begin (Union[datetime, int, str, None], default None): Most recent datatime to search for data. If backtrack_minutes is provided, subtract backtrack_minutes.
  • end (Union[datetime, int, str, None], default None): The latest datetime to search for data. If end is None, do not bound
  • check_existing (bool, defult True): If False, use a backtrack interval of 0 minutes.
  • chunksize (Optional[int], default -1): How many rows to load into memory at once. Otherwise the entire result set is loaded into memory.
  • workers (Optional[int], default None): How many threads to use when consuming the generator. Defaults to the number of cores.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A pandas DataFrame generator.
def get_pipe_metadef( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, begin: Union[datetime.datetime, int, str, NoneType] = '', end: Union[datetime.datetime, int, str, NoneType] = None, check_existing: bool = True, debug: bool = False, **kw: Any) -> Optional[str]:
 89def get_pipe_metadef(
 90    self,
 91    pipe: mrsm.Pipe,
 92    params: Optional[Dict[str, Any]] = None,
 93    begin: Union[datetime, int, str, None] = '',
 94    end: Union[datetime, int, str, None] = None,
 95    check_existing: bool = True,
 96    debug: bool = False,
 97    **kw: Any
 98) -> Union[str, None]:
 99    """
100    Return a pipe's meta definition fetch query.
101
102    params: Optional[Dict[str, Any]], default None
103        Optional params dictionary to build the `WHERE` clause.
104        See `meerschaum.utils.sql.build_where`.
105
106    begin: Union[datetime, int, str, None], default None
107        Most recent datatime to search for data.
108        If `backtrack_minutes` is provided, subtract `backtrack_minutes`.
109
110    end: Union[datetime, int, str, None], default None
111        The latest datetime to search for data.
112        If `end` is `None`, do not bound 
113
114    check_existing: bool, default True
115        If `True`, apply the backtrack interval.
116
117    debug: bool, default False
118        Verbosity toggle.
119
120    Returns
121    -------
122    A pipe's meta definition fetch query string.
123    """
124    from meerschaum.utils.warnings import warn
125    from meerschaum.utils.sql import sql_item_name, dateadd_str, build_where
126    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
127    from meerschaum.config import get_config
128
129    dt_col = pipe.columns.get('datetime', None)
130    if not dt_col:
131        dt_col = pipe.guess_datetime()
132        dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
133        is_guess = True
134    else:
135        dt_name = sql_item_name(dt_col, self.flavor, None)
136        is_guess = False
137    dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
138    db_dt_typ = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
139
140    if begin not in (None, '') or end is not None:
141        if is_guess:
142            if dt_col is None:
143                warn(
144                    f"Unable to determine a datetime column for {pipe}."
145                    + "\n    Ignoring begin and end...",
146                    stack=False,
147                )
148                begin, end = '', None
149            else:
150                warn(
151                    f"A datetime wasn't specified for {pipe}.\n"
152                    + f"    Using column \"{dt_col}\" for datetime bounds...",
153                    stack=False
154                )
155
156    apply_backtrack = begin == '' and check_existing
157    backtrack_interval = pipe.get_backtrack_interval(check_existing=check_existing, debug=debug)
158    btm = (
159        int(backtrack_interval.total_seconds() / 60)
160        if isinstance(backtrack_interval, timedelta)
161        else backtrack_interval
162    )
163    begin = (
164        pipe.get_sync_time(debug=debug)
165        if begin == ''
166        else begin
167    )
168
169    if begin not in (None, '') and end is not None and begin >= end:
170        begin = None
171
172    if dt_name:
173        begin_da = (
174            dateadd_str(
175                flavor=self.flavor,
176                datepart='minute',
177                number=((-1 * btm) if apply_backtrack else 0),
178                begin=begin,
179                db_type=db_dt_typ,
180            )
181            if begin not in ('', None)
182            else None
183        )
184        end_da = (
185            dateadd_str(
186                flavor=self.flavor,
187                datepart='minute',
188                number=0,
189                begin=end,
190                db_type=db_dt_typ,
191            )
192            if end is not None
193            else None
194        )
195
196    definition_name = sql_item_name('definition', self.flavor, None)
197    meta_def = (
198        _simple_fetch_query(pipe, self.flavor) if (
199            (not (pipe.columns or {}).get('id', None))
200            or (not get_config('system', 'experimental', 'join_fetch'))
201        ) else _join_fetch_query(pipe, self.flavor, debug=debug, **kw)
202    )
203
204    has_where = 'where' in meta_def.lower()[meta_def.lower().rfind('definition'):]
205    if dt_name and (begin_da or end_da):
206        definition_dt_name = f"{definition_name}.{dt_name}"
207        meta_def += "\n" + ("AND" if has_where else "WHERE") + " "
208        has_where = True
209        if begin_da:
210            meta_def += f"\n    {definition_dt_name}\n    >=\n    {begin_da}\n"
211        if begin_da and end_da:
212            meta_def += "    AND"
213        if end_da:
214            meta_def += f"\n    {definition_dt_name}\n    <\n    {end_da}\n"
215
216    if params is not None:
217        params_where = build_where(params, self, with_where=False)
218        meta_def += "\n    " + ("AND" if has_where else "WHERE") + "    "
219        has_where = True
220        meta_def += params_where
221
222    return meta_def.rstrip()

Return a pipe's meta definition fetch query.

params: Optional[Dict[str, Any]], default None Optional params dictionary to build the WHERE clause. See meerschaum.utils.sql.build_where.

begin: Union[datetime, int, str, None], default None Most recent datatime to search for data. If backtrack_minutes is provided, subtract backtrack_minutes.

end: Union[datetime, int, str, None], default None The latest datetime to search for data. If end is None, do not bound

check_existing: bool, default True If True, apply the backtrack interval.

debug: bool, default False Verbosity toggle.

Returns
  • A pipe's meta definition fetch query string.
def cli(self, debug: bool = False) -> Tuple[bool, str]:
39def cli(
40    self,
41    debug: bool = False,
42) -> SuccessTuple:
43    """
44    Launch a subprocess for an interactive CLI.
45    """
46    from meerschaum.utils.warnings import dprint
47    from meerschaum.utils.venv import venv_exec
48
49    ### Initialize the engine so that dependencies are resolved.
50    _ = self.engine
51
52    env = copy.deepcopy(dict(os.environ))
53    env_key = f"MRSM_SQL_{self.label.upper()}"
54    env_val = json.dumps(self.meta)
55    env[env_key] = env_val
56    cli_code = (
57        "import sys\n"
58        "import meerschaum as mrsm\n"
59        "import os\n"
60        f"conn = mrsm.get_connector('sql:{self.label}')\n"
61        "success, msg = conn._cli_exit()\n"
62        "mrsm.pprint((success, msg))\n"
63        "if not success:\n"
64        "    raise Exception(msg)"
65    )
66    if debug:
67        dprint(cli_code)
68    try:
69        _ = venv_exec(cli_code, venv=None, env=env, debug=debug, capture_output=False)
70    except Exception as e:
71        return False, f"[{self}] Failed to start CLI:\n{e}"
72    return True, "Success"

Launch a subprocess for an interactive CLI.

def fetch_pipes_keys( self, connector_keys: Optional[List[str]] = None, metric_keys: Optional[List[str]] = None, location_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False) -> List[Tuple[str, str, Optional[str], Dict[str, Any]]]:
144def fetch_pipes_keys(
145    self,
146    connector_keys: Optional[List[str]] = None,
147    metric_keys: Optional[List[str]] = None,
148    location_keys: Optional[List[str]] = None,
149    tags: Optional[List[str]] = None,
150    params: Optional[Dict[str, Any]] = None,
151    debug: bool = False,
152) -> List[
153        Tuple[str, str, Union[str, None], Dict[str, Any]]
154    ]:
155    """
156    Return a list of tuples corresponding to the parameters provided.
157
158    Parameters
159    ----------
160    connector_keys: Optional[List[str]], default None
161        List of connector_keys to search by.
162
163    metric_keys: Optional[List[str]], default None
164        List of metric_keys to search by.
165
166    location_keys: Optional[List[str]], default None
167        List of location_keys to search by.
168
169    tags: Optional[List[str]], default None
170        List of pipes to search by.
171
172    params: Optional[Dict[str, Any]], default None
173        Dictionary of additional parameters to search by.
174        E.g. `--params pipe_id:1`
175
176    debug: bool, default False
177        Verbosity toggle.
178
179    Returns
180    -------
181    A list of tuples of pipes' keys and parameters (connector_keys, metric_key, location_key, parameters).
182    """
183    from meerschaum.utils.packages import attempt_import
184    from meerschaum.utils.misc import separate_negation_values
185    from meerschaum.utils.sql import (
186        OMIT_NULLSFIRST_FLAVORS,
187        table_exists,
188        json_flavors,
189    )
190    from meerschaum._internal.static import STATIC_CONFIG
191    import json
192    from copy import deepcopy
193    sqlalchemy, sqlalchemy_sql_functions = attempt_import(
194        'sqlalchemy',
195        'sqlalchemy.sql.functions', lazy=False,
196    )
197    coalesce = sqlalchemy_sql_functions.coalesce
198
199    if connector_keys is None:
200        connector_keys = []
201    if metric_keys is None:
202        metric_keys = []
203    if location_keys is None:
204        location_keys = []
205    else:
206        location_keys = [
207            (
208                lk
209                if lk not in ('[None]', 'None', 'null')
210                else 'None'
211            )
212            for lk in location_keys
213        ]
214    if tags is None:
215        tags = []
216
217    if params is None:
218        params = {}
219
220    ### Add three primary keys to params dictionary
221    ###   (separated for convenience of arguments).
222    cols = {
223        'connector_keys': [str(ck) for ck in connector_keys],
224        'metric_key': [str(mk) for mk in metric_keys],
225        'location_key': [str(lk) for lk in location_keys],
226    }
227
228    ### Make deep copy so we don't mutate this somewhere else.
229    parameters = deepcopy(params)
230    for col, vals in cols.items():
231        if vals not in [[], ['*']]:
232            parameters[col] = vals
233
234    if not table_exists('mrsm_pipes', self, schema=self.instance_schema, debug=debug):
235        return []
236
237    from meerschaum.connectors.sql.tables import get_tables
238    pipes_tbl = get_tables(mrsm_instance=self, create=False, debug=debug)['pipes']
239
240    _params = {}
241    for k, v in parameters.items():
242        _v = json.dumps(v) if isinstance(v, dict) else v
243        _params[k] = _v
244
245    negation_prefix = STATIC_CONFIG['system']['fetch_pipes_keys']['negation_prefix']
246    ### Parse regular params.
247    ### If a param begins with '_', negate it instead.
248    _where = [
249        (
250            (coalesce(pipes_tbl.c[key], 'None') == val)
251            if not str(val).startswith(negation_prefix)
252            else (pipes_tbl.c[key] != key)
253        ) for key, val in _params.items()
254        if not isinstance(val, (list, tuple)) and key in pipes_tbl.c
255    ]
256    if self.flavor in json_flavors:
257        sqlalchemy_dialects = mrsm.attempt_import('sqlalchemy.dialects', lazy=False)
258        JSONB = sqlalchemy_dialects.postgresql.JSONB
259    else:
260        JSONB = sqlalchemy.String
261
262    select_cols = (
263        [
264            pipes_tbl.c.connector_keys,
265            pipes_tbl.c.metric_key,
266            pipes_tbl.c.location_key,
267            pipes_tbl.c.parameters,
268        ]
269    )
270
271    q = sqlalchemy.select(*select_cols).where(sqlalchemy.and_(True, *_where))
272    for c, vals in cols.items():
273        if not isinstance(vals, (list, tuple)) or not vals or c not in pipes_tbl.c:
274            continue
275        _in_vals, _ex_vals = separate_negation_values(vals)
276        q = q.where(coalesce(pipes_tbl.c[c], 'None').in_(_in_vals)) if _in_vals else q
277        q = q.where(coalesce(pipes_tbl.c[c], 'None').not_in(_ex_vals)) if _ex_vals else q
278
279    ### Finally, parse tags.
280    tag_groups = [tag.split(',') for tag in tags]
281    in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups]
282
283    ors, nands = [], []
284    if self.flavor in json_flavors:
285        tags_jsonb = pipes_tbl.c['parameters'].cast(JSONB).op('->')('tags').cast(JSONB)
286        for _in_tags, _ex_tags in in_ex_tag_groups:
287            if _in_tags:
288                ors.append(
289                    sqlalchemy.and_(
290                        tags_jsonb.contains(_in_tags)
291                    )
292                )
293            for xt in _ex_tags:
294                nands.append(
295                    sqlalchemy.not_(
296                        sqlalchemy.and_(
297                            tags_jsonb.contains([xt])
298                        )
299                    )
300                )
301    else:
302        for _in_tags, _ex_tags in in_ex_tag_groups:
303            sub_ands = []
304            for nt in _in_tags:
305                sub_ands.append(
306                    sqlalchemy.cast(
307                        pipes_tbl.c['parameters'],
308                        sqlalchemy.String,
309                    ).like(f'%"tags":%"{nt}"%')
310                )
311            if sub_ands:
312                ors.append(sqlalchemy.and_(*sub_ands))
313
314            for xt in _ex_tags:
315                nands.append(
316                    sqlalchemy.cast(
317                        pipes_tbl.c['parameters'],
318                        sqlalchemy.String,
319                    ).not_like(f'%"tags":%"{xt}"%')
320                )
321
322    q = q.where(sqlalchemy.and_(*nands)) if nands else q
323    q = q.where(sqlalchemy.or_(*ors)) if ors else q
324    loc_asc = sqlalchemy.asc(pipes_tbl.c['location_key'])
325    if self.flavor not in OMIT_NULLSFIRST_FLAVORS:
326        loc_asc = sqlalchemy.nullsfirst(loc_asc)
327    q = q.order_by(
328        sqlalchemy.asc(pipes_tbl.c['connector_keys']),
329        sqlalchemy.asc(pipes_tbl.c['metric_key']),
330        loc_asc,
331    )
332
333    ### execute the query and return a list of tuples
334    if debug:
335        dprint(q)
336    try:
337        rows = (
338            self.execute(q).fetchall()
339            if self.flavor != 'duckdb'
340            else [
341                (row['connector_keys'], row['metric_key'], row['location_key'])
342                for row in self.read(q).to_dict(orient='records')
343            ]
344        )
345    except Exception as e:
346        error(str(e))
347
348    return rows

Return a list of tuples corresponding to the parameters provided.

Parameters
  • connector_keys (Optional[List[str]], default None): List of connector_keys to search by.
  • metric_keys (Optional[List[str]], default None): List of metric_keys to search by.
  • location_keys (Optional[List[str]], default None): List of location_keys to search by.
  • tags (Optional[List[str]], default None): List of pipes to search by.
  • params (Optional[Dict[str, Any]], default None): Dictionary of additional parameters to search by. E.g. --params pipe_id:1
  • debug (bool, default False): Verbosity toggle.
Returns
  • A list of tuples of pipes' keys and parameters (connector_keys, metric_key, location_key, parameters).
def create_indices( self, pipe: meerschaum.Pipe, columns: Optional[List[str]] = None, indices: Optional[List[str]] = None, debug: bool = False) -> bool:
369def create_indices(
370    self,
371    pipe: mrsm.Pipe,
372    columns: Optional[List[str]] = None,
373    indices: Optional[List[str]] = None,
374    debug: bool = False
375) -> bool:
376    """
377    Create a pipe's indices.
378    """
379    if pipe.__dict__.get('_skip_check_indices', False):
380        return True
381
382    if debug:
383        dprint(f"Creating indices for {pipe}...")
384
385    if not pipe.indices:
386        warn(f"{pipe} has no index columns; skipping index creation.", stack=False)
387        return True
388
389    cols_to_include = set((columns or []) + (indices or [])) or None
390
391    pipe._clear_cache_key('_columns_indices', debug=debug)
392    ix_queries = {
393        col: queries
394        for col, queries in self.get_create_index_queries(pipe, debug=debug).items()
395        if cols_to_include is None or col in cols_to_include
396    }
397    success = True
398    for col, queries in ix_queries.items():
399        ix_success = all(self.exec_queries(queries, debug=debug, silent=False))
400        success = success and ix_success
401        if not ix_success:
402            warn(f"Failed to create index on column: {col}")
403
404    return success

Create a pipe's indices.

def drop_indices( self, pipe: meerschaum.Pipe, columns: Optional[List[str]] = None, indices: Optional[List[str]] = None, debug: bool = False) -> bool:
425def drop_indices(
426    self,
427    pipe: mrsm.Pipe,
428    columns: Optional[List[str]] = None,
429    indices: Optional[List[str]] = None,
430    debug: bool = False
431) -> bool:
432    """
433    Drop a pipe's indices.
434    """
435    if debug:
436        dprint(f"Dropping indices for {pipe}...")
437
438    if not pipe.indices:
439        warn(f"No indices to drop for {pipe}.", stack=False)
440        return False
441
442    cols_to_include = set((columns or []) + (indices or [])) or None
443
444    ix_queries = {
445        col: queries
446        for col, queries in self.get_drop_index_queries(pipe, debug=debug).items()
447        if cols_to_include is None or col in cols_to_include
448    }
449    success = True
450    for col, queries in ix_queries.items():
451        ix_success = all(self.exec_queries(queries, debug=debug, silent=(not debug)))
452        if not ix_success:
453            success = False
454            if debug:
455                dprint(f"Failed to drop index on column: {col}")
456    return success

Drop a pipe's indices.

def get_create_index_queries( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, List[str]]:
512def get_create_index_queries(
513    self,
514    pipe: mrsm.Pipe,
515    debug: bool = False,
516) -> Dict[str, List[str]]:
517    """
518    Return a dictionary mapping columns to a `CREATE INDEX` or equivalent query.
519
520    Parameters
521    ----------
522    pipe: mrsm.Pipe
523        The pipe to which the queries will correspond.
524
525    Returns
526    -------
527    A dictionary of index names mapping to lists of queries.
528    """
529    ### NOTE: Due to recent breaking changes in DuckDB, indices don't behave properly.
530    if self.flavor == 'duckdb':
531        return {}
532    from meerschaum.utils.sql import (
533        sql_item_name,
534        get_distinct_col_count,
535        UPDATE_QUERIES,
536        get_null_replacement,
537        get_create_table_queries,
538        get_rename_table_queries,
539        COALESCE_UNIQUE_INDEX_FLAVORS,
540    )
541    from meerschaum.utils.dtypes import are_dtypes_equal
542    from meerschaum.utils.dtypes.sql import (
543        get_db_type_from_pd_type,
544        get_pd_type_from_db_type,
545        AUTO_INCREMENT_COLUMN_FLAVORS,
546    )
547    from meerschaum.config import get_config
548    index_queries = {}
549
550    upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
551    static = pipe.parameters.get('static', False)
552    null_indices = pipe.parameters.get('null_indices', True)
553    index_names = pipe.get_indices()
554    unique_index_name_unquoted = index_names.get('unique', None) or f'IX_{pipe.target}_unique'
555    if upsert:
556        _ = index_names.pop('unique', None)
557    indices = pipe.indices
558    existing_cols_types = pipe.get_columns_types(debug=debug)
559    existing_cols_pd_types = {
560        col: get_pd_type_from_db_type(typ)
561        for col, typ in existing_cols_types.items()
562    }
563    existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
564    existing_ix_names = set()
565    existing_primary_keys = []
566    existing_clustered_primary_keys = []
567    for col, col_indices in existing_cols_indices.items():
568        for col_ix_doc in col_indices:
569            existing_ix_names.add(col_ix_doc.get('name', '').lower())
570            if col_ix_doc.get('type', None) == 'PRIMARY KEY':
571                existing_primary_keys.append(col.lower())
572                if col_ix_doc.get('clustered', True):
573                    existing_clustered_primary_keys.append(col.lower())
574
575    _datetime = pipe.get_columns('datetime', error=False)
576    _datetime_name = (
577        sql_item_name(_datetime, self.flavor, None)
578        if _datetime is not None else None
579    )
580    _datetime_index_name = (
581        sql_item_name(index_names['datetime'], flavor=self.flavor, schema=None)
582        if index_names.get('datetime', None)
583        else None
584    )
585    _id = pipe.get_columns('id', error=False)
586    _id_name = (
587        sql_item_name(_id, self.flavor, None)
588        if _id is not None
589        else None
590    )
591    primary_key = pipe.columns.get('primary', None)
592    primary_key_name = (
593        sql_item_name(primary_key, flavor=self.flavor, schema=None)
594        if primary_key
595        else None
596    )
597    autoincrement = (
598        pipe.parameters.get('autoincrement', False)
599        or (
600            primary_key is not None
601            and primary_key not in existing_cols_pd_types
602        )
603    )
604    primary_key_db_type = (
605        get_db_type_from_pd_type(pipe.dtypes.get(primary_key, 'int') or 'int', self.flavor)
606        if primary_key
607        else None
608    )
609    primary_key_constraint_name = (
610        sql_item_name(f'PK_{pipe.target}', self.flavor, None)
611        if primary_key is not None
612        else None
613    )
614    primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
615    datetime_clustered = (
616        "CLUSTERED"
617        if not existing_clustered_primary_keys and _datetime is not None
618        else "NONCLUSTERED"
619    )
620    include_columns_str = "\n    ,".join(
621        [
622            sql_item_name(col, flavor=self.flavor) for col in existing_cols_types
623            if col != _datetime
624        ]
625    ).rstrip(',')
626    include_clause = (
627        (
628            f"\nINCLUDE (\n    {include_columns_str}\n)"
629        )
630        if datetime_clustered == 'NONCLUSTERED'
631        else ''
632    )
633
634    _id_index_name = (
635        sql_item_name(index_names['id'], self.flavor, None)
636        if index_names.get('id', None)
637        else None
638    )
639    _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
640    _create_space_partition = get_config('system', 'experimental', 'space')
641
642    ### create datetime index
643    dt_query = None
644    if _datetime is not None:
645        if (
646            self.flavor in ('timescaledb', 'timescaledb-ha')
647            and pipe.parameters.get('hypertable', True)
648        ):
649            _id_count = (
650                get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self)
651                if (_id is not None and _create_space_partition) else None
652            )
653
654            chunk_interval = pipe.get_chunk_interval(debug=debug)
655            chunk_interval_minutes = (
656                chunk_interval
657                if isinstance(chunk_interval, int)
658                else int(chunk_interval.total_seconds() / 60)
659            )
660            chunk_time_interval = (
661                f"INTERVAL '{chunk_interval_minutes} MINUTES'"
662                if isinstance(chunk_interval, timedelta)
663                else f'{chunk_interval_minutes}'
664            )
665
666            dt_query = (
667                f"SELECT public.create_hypertable('{_pipe_name}', " +
668                f"'{_datetime}', "
669                + (
670                    f"'{_id}', {_id_count}, " if (_id is not None and _create_space_partition)
671                    else ''
672                )
673                + f'chunk_time_interval => {chunk_time_interval}, '
674                + 'if_not_exists => true, '
675                + "migrate_data => true);"
676            )
677        elif _datetime_index_name and _datetime != primary_key:
678            if self.flavor == 'mssql':
679                dt_query = (
680                    f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
681                    f"\nON {_pipe_name} ({_datetime_name}){include_clause}"
682                )
683            else:
684                dt_query = (
685                    f"CREATE INDEX {_datetime_index_name} "
686                    + f"ON {_pipe_name} ({_datetime_name})"
687                )
688
689    if dt_query:
690        index_queries[_datetime] = [dt_query]
691
692    primary_queries = []
693    if (
694        primary_key is not None
695        and primary_key.lower() not in existing_primary_keys
696        and not static
697    ):
698        if autoincrement and primary_key not in existing_cols_pd_types:
699            autoincrement_str = AUTO_INCREMENT_COLUMN_FLAVORS.get(
700                self.flavor,
701                AUTO_INCREMENT_COLUMN_FLAVORS['default']
702            )
703            primary_queries.extend([
704                (
705                    f"ALTER TABLE {_pipe_name}\n"
706                    f"ADD {primary_key_name} {primary_key_db_type} {autoincrement_str}"
707                ),
708            ])
709        elif not autoincrement and primary_key in existing_cols_pd_types:
710            if self.flavor in ('sqlite', 'geopackage'):
711                new_table_name = sql_item_name(
712                    f'_new_{pipe.target}',
713                    self.flavor,
714                    self.get_pipe_schema(pipe)
715                )
716                select_cols_str = ', '.join(
717                    [
718                        sql_item_name(col, self.flavor, None)
719                        for col in existing_cols_types
720                    ]
721                )
722                primary_queries.extend(
723                    get_create_table_queries(
724                        existing_cols_pd_types,
725                        f'_new_{pipe.target}',
726                        self.flavor,
727                        schema=self.get_pipe_schema(pipe),
728                        primary_key=primary_key,
729                    ) + [
730                        (
731                            f"INSERT INTO {new_table_name} ({select_cols_str})\n"
732                            f"SELECT {select_cols_str}\nFROM {_pipe_name}"
733                        ),
734                        f"DROP TABLE {_pipe_name}",
735                    ] + get_rename_table_queries(
736                        f'_new_{pipe.target}',
737                        pipe.target,
738                        self.flavor,
739                        schema=self.get_pipe_schema(pipe),
740                    )
741                )
742            elif self.flavor == 'oracle':
743                primary_queries.extend([
744                    (
745                        f"ALTER TABLE {_pipe_name}\n"
746                        f"MODIFY {primary_key_name} NOT NULL"
747                    ),
748                    (
749                        f"ALTER TABLE {_pipe_name}\n"
750                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
751                    )
752                ])
753            elif self.flavor in ('mysql', 'mariadb'):
754                primary_queries.extend([
755                    (
756                        f"ALTER TABLE {_pipe_name}\n"
757                        f"MODIFY {primary_key_name} {primary_key_db_type} NOT NULL"
758                    ),
759                    (
760                        f"ALTER TABLE {_pipe_name}\n"
761                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
762                    )
763                ])
764            elif self.flavor in ('timescaledb', 'timescaledb-ha'):
765                primary_queries.extend([
766                    (
767                        f"ALTER TABLE {_pipe_name}\n"
768                        f"ALTER COLUMN {primary_key_name} SET NOT NULL"
769                    ),
770                    (
771                        f"ALTER TABLE {_pipe_name}\n"
772                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY (" + (
773                            f"{_datetime_name}, " if _datetime_name else ""
774                        ) + f"{primary_key_name})"
775                    ),
776                ])
777            elif self.flavor in ('citus', 'postgresql', 'duckdb', 'postgis'):
778                primary_queries.extend([
779                    (
780                        f"ALTER TABLE {_pipe_name}\n"
781                        f"ALTER COLUMN {primary_key_name} SET NOT NULL"
782                    ),
783                    (
784                        f"ALTER TABLE {_pipe_name}\n"
785                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
786                    ),
787                ])
788            else:
789                primary_queries.extend([
790                    (
791                        f"ALTER TABLE {_pipe_name}\n"
792                        f"ALTER COLUMN {primary_key_name} {primary_key_db_type} NOT NULL"
793                    ),
794                    (
795                        f"ALTER TABLE {_pipe_name}\n"
796                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
797                    ),
798                ])
799        index_queries[primary_key] = primary_queries
800
801    ### create id index
802    if _id_name is not None:
803        if self.flavor in ('timescaledb', 'timescaledb-ha'):
804            ### Already created indices via create_hypertable.
805            id_query = (
806                None if (_id is not None and _create_space_partition)
807                else (
808                    f"CREATE INDEX IF NOT EXISTS {_id_index_name} ON {_pipe_name} ({_id_name})"
809                    if _id is not None
810                    else None
811                )
812            )
813            pass
814        else: ### mssql, sqlite, etc.
815            id_query = f"CREATE INDEX {_id_index_name} ON {_pipe_name} ({_id_name})"
816
817        if id_query is not None:
818            index_queries[_id] = id_query if isinstance(id_query, list) else [id_query]
819
820    ### Create indices for other labels in `pipe.columns`.
821    other_index_names = {
822        ix_key: ix_unquoted
823        for ix_key, ix_unquoted in index_names.items()
824        if (
825            ix_key not in ('datetime', 'id', 'primary')
826            and ix_unquoted.lower() not in existing_ix_names
827        )
828    }
829    for ix_key, ix_unquoted in other_index_names.items():
830        ix_name = sql_item_name(ix_unquoted, self.flavor, None)
831        cols = indices[ix_key]
832        if not isinstance(cols, (list, tuple)):
833            cols = [cols]
834        if ix_key == 'unique' and upsert:
835            continue
836        cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
837        if not cols_names:
838            continue
839
840        cols_names_str = ", ".join(cols_names)
841        index_query_params_clause = f" ({cols_names_str})"
842        if self.flavor in ('postgis', 'timescaledb-ha'):
843            for col in cols:
844                col_typ = existing_cols_pd_types.get(cols[0], 'object')
845                if col_typ != 'object' and are_dtypes_equal(col_typ, 'geometry'):
846                    index_query_params_clause = f" USING GIST ({cols_names_str})"
847                    break
848
849        index_queries[ix_key] = [
850            f"CREATE INDEX {ix_name} ON {_pipe_name}{index_query_params_clause}"
851        ]
852
853    indices_cols_str = ', '.join(
854        list({
855            sql_item_name(ix, self.flavor)
856            for ix_key, ix in pipe.columns.items()
857            if ix and ix in existing_cols_types
858        })
859    )
860    coalesce_indices_cols_str = ', '.join(
861        [
862            (
863                (
864                    "COALESCE("
865                    + sql_item_name(ix, self.flavor)
866                    + ", "
867                    + get_null_replacement(existing_cols_types[ix], self.flavor)
868                    + ") "
869                )
870                if ix_key != 'datetime' and null_indices
871                else sql_item_name(ix, self.flavor)
872            )
873            for ix_key, ix in pipe.columns.items()
874            if ix and ix in existing_cols_types
875        ]
876    )
877    unique_index_name = sql_item_name(unique_index_name_unquoted, self.flavor)
878    constraint_name_unquoted = unique_index_name_unquoted.replace('IX_', 'UQ_')
879    constraint_name = sql_item_name(constraint_name_unquoted, self.flavor)
880    add_constraint_query = (
881        f"ALTER TABLE {_pipe_name} ADD CONSTRAINT {constraint_name} UNIQUE ({indices_cols_str})"
882    )
883    unique_index_cols_str = (
884        indices_cols_str
885        if self.flavor not in COALESCE_UNIQUE_INDEX_FLAVORS or not null_indices
886        else coalesce_indices_cols_str
887    )
888    create_unique_index_query = (
889        f"CREATE UNIQUE INDEX {unique_index_name} ON {_pipe_name} ({unique_index_cols_str})"
890    )
891    constraint_queries = [create_unique_index_query]
892    if self.flavor not in ('sqlite', 'geopackage'):
893        constraint_queries.append(add_constraint_query)
894    if upsert and indices_cols_str:
895        index_queries[unique_index_name] = constraint_queries
896    return index_queries

Return a dictionary mapping columns to a CREATE INDEX or equivalent query.

Parameters
  • pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
  • A dictionary of index names mapping to lists of queries.
def get_drop_index_queries( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, List[str]]:
 899def get_drop_index_queries(
 900    self,
 901    pipe: mrsm.Pipe,
 902    debug: bool = False,
 903) -> Dict[str, List[str]]:
 904    """
 905    Return a dictionary mapping columns to a `DROP INDEX` or equivalent query.
 906
 907    Parameters
 908    ----------
 909    pipe: mrsm.Pipe
 910        The pipe to which the queries will correspond.
 911
 912    Returns
 913    -------
 914    A dictionary of column names mapping to lists of queries.
 915    """
 916    ### NOTE: Due to breaking changes within DuckDB, indices must be skipped.
 917    if self.flavor == 'duckdb':
 918        return {}
 919    if not pipe.exists(debug=debug):
 920        return {}
 921
 922    from collections import defaultdict
 923    from meerschaum.utils.sql import (
 924        sql_item_name,
 925        table_exists,
 926        hypertable_queries,
 927        DROP_INDEX_IF_EXISTS_FLAVORS,
 928    )
 929    drop_queries = defaultdict(lambda: [])
 930    schema = self.get_pipe_schema(pipe)
 931    index_schema = schema if self.flavor != 'mssql' else None
 932    indices = {
 933        ix_key: ix
 934        for ix_key, ix in pipe.get_indices().items()
 935    }
 936    cols_indices = pipe.get_columns_indices(debug=debug)
 937    existing_indices = set()
 938    clustered_ix = None
 939    for col, ix_metas in cols_indices.items():
 940        for ix_meta in ix_metas:
 941            ix_name = ix_meta.get('name', None)
 942            if ix_meta.get('clustered', False):
 943                clustered_ix = ix_name
 944            existing_indices.add(ix_name.lower())
 945    pipe_name = sql_item_name(pipe.target, self.flavor, schema)
 946    pipe_name_no_schema = sql_item_name(pipe.target, self.flavor, None)
 947    upsert = pipe.upsert
 948
 949    if self.flavor not in hypertable_queries:
 950        is_hypertable = False
 951    else:
 952        is_hypertable_query = hypertable_queries[self.flavor].format(table_name=pipe_name)
 953        is_hypertable = self.value(is_hypertable_query, silent=True, debug=debug) is not None
 954
 955    if_exists_str = "IF EXISTS " if self.flavor in DROP_INDEX_IF_EXISTS_FLAVORS else ""
 956    if is_hypertable:
 957        nuke_queries = []
 958        temp_table = '_' + pipe.target + '_temp_migration'
 959        temp_table_name = sql_item_name(temp_table, self.flavor, self.get_pipe_schema(pipe))
 960
 961        if table_exists(temp_table, self, schema=self.get_pipe_schema(pipe), debug=debug):
 962            nuke_queries.append(f"DROP TABLE {if_exists_str} {temp_table_name}")
 963        nuke_queries += [
 964            f"SELECT * INTO {temp_table_name} FROM {pipe_name}",
 965            f"DROP TABLE {if_exists_str}{pipe_name}",
 966            f"ALTER TABLE {temp_table_name} RENAME TO {pipe_name_no_schema}",
 967        ]
 968        nuke_ix_keys = ('datetime', 'id')
 969        nuked = False
 970        for ix_key in nuke_ix_keys:
 971            if ix_key in indices and not nuked:
 972                drop_queries[ix_key].extend(nuke_queries)
 973                nuked = True
 974
 975    for ix_key, ix_unquoted in indices.items():
 976        if ix_key in drop_queries:
 977            continue
 978        if ix_unquoted.lower() not in existing_indices:
 979            continue
 980
 981        if (
 982            ix_key == 'unique'
 983            and upsert
 984            and self.flavor not in ('sqlite', 'geopackage')
 985            and not is_hypertable
 986        ):
 987            constraint_name_unquoted = ix_unquoted.replace('IX_', 'UQ_')
 988            constraint_name = sql_item_name(constraint_name_unquoted, self.flavor)
 989            constraint_or_index = (
 990                "CONSTRAINT"
 991                if self.flavor not in ('mysql', 'mariadb')
 992                else 'INDEX'
 993            )
 994            drop_queries[ix_key].append(
 995                f"ALTER TABLE {pipe_name}\n"
 996                f"DROP {constraint_or_index} {constraint_name}"
 997            )
 998
 999        query = (
1000            (
1001                f"ALTER TABLE {pipe_name}\n"
1002                if self.flavor in ('mysql', 'mariadb')
1003                else ''
1004            )
1005            + f"DROP INDEX {if_exists_str}"
1006            + sql_item_name(ix_unquoted, self.flavor, index_schema)
1007        )
1008        if self.flavor == 'mssql':
1009            query += f"\nON {pipe_name}"
1010            if ix_unquoted == clustered_ix:
1011                query += "\nWITH (ONLINE = ON, MAXDOP = 4)"
1012        drop_queries[ix_key].append(query)
1013
1014
1015    return drop_queries

Return a dictionary mapping columns to a DROP INDEX or equivalent query.

Parameters
  • pipe (mrsm.Pipe): The pipe to which the queries will correspond.
Returns
  • A dictionary of column names mapping to lists of queries.
def get_add_columns_queries( self, pipe: meerschaum.Pipe, df: 'Union[pd.DataFrame, Dict[str, str]]', _is_db_types: bool = False, debug: bool = False) -> List[str]:
3163def get_add_columns_queries(
3164    self,
3165    pipe: mrsm.Pipe,
3166    df: Union[pd.DataFrame, Dict[str, str]],
3167    _is_db_types: bool = False,
3168    debug: bool = False,
3169) -> List[str]:
3170    """
3171    Add new null columns of the correct type to a table from a dataframe.
3172
3173    Parameters
3174    ----------
3175    pipe: mrsm.Pipe
3176        The pipe to be altered.
3177
3178    df: Union[pd.DataFrame, Dict[str, str]]
3179        The pandas DataFrame which contains new columns.
3180        If a dictionary is provided, assume it maps columns to Pandas data types.
3181
3182    _is_db_types: bool, default False
3183        If `True`, assume `df` is a dictionary mapping columns to SQL native dtypes.
3184
3185    Returns
3186    -------
3187    A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
3188    """
3189    if not pipe.exists(debug=debug):
3190        return []
3191
3192    if pipe.parameters.get('static', False):
3193        return []
3194
3195    from decimal import Decimal
3196    import copy
3197    from meerschaum.utils.sql import (
3198        sql_item_name,
3199        SINGLE_ALTER_TABLE_FLAVORS,
3200        get_table_cols_types,
3201    )
3202    from meerschaum.utils.dtypes.sql import (
3203        get_pd_type_from_db_type,
3204        get_db_type_from_pd_type,
3205    )
3206    from meerschaum.utils.misc import flatten_list
3207    is_dask = 'dask' in df.__module__ if not isinstance(df, dict) else False
3208    if is_dask:
3209        df = df.partitions[0].compute()
3210    df_cols_types = (
3211        {
3212            col: str(typ)
3213            for col, typ in df.dtypes.items()
3214        }
3215        if not isinstance(df, dict)
3216        else copy.deepcopy(df)
3217    )
3218    if not isinstance(df, dict) and len(df.index) > 0:
3219        for col, typ in list(df_cols_types.items()):
3220            if typ != 'object':
3221                continue
3222            val = df.iloc[0][col]
3223            if isinstance(val, (dict, list)):
3224                df_cols_types[col] = 'json'
3225            elif isinstance(val, Decimal):
3226                df_cols_types[col] = 'numeric'
3227            elif isinstance(val, str):
3228                df_cols_types[col] = 'str'
3229    db_cols_types = {
3230        col: get_pd_type_from_db_type(typ)
3231        for col, typ in get_table_cols_types(
3232            pipe.target,
3233            self,
3234            schema=self.get_pipe_schema(pipe),
3235            debug=debug,
3236        ).items()
3237    }
3238    new_cols = set(df_cols_types) - set(db_cols_types)
3239    if not new_cols:
3240        return []
3241
3242    new_cols_types = {
3243        col: get_db_type_from_pd_type(
3244            df_cols_types[col],
3245            self.flavor
3246        )
3247        for col in new_cols
3248        if col and df_cols_types.get(col, None)
3249    }
3250
3251    alter_table_query = "ALTER TABLE " + sql_item_name(
3252        pipe.target, self.flavor, self.get_pipe_schema(pipe)
3253    )
3254    queries = []
3255    for col, typ in new_cols_types.items():
3256        add_col_query = (
3257            "\nADD "
3258            + sql_item_name(col, self.flavor, None)
3259            + " " + typ + ","
3260        )
3261
3262        if self.flavor in SINGLE_ALTER_TABLE_FLAVORS:
3263            queries.append(alter_table_query + add_col_query[:-1])
3264        else:
3265            alter_table_query += add_col_query
3266
3267    ### For most flavors, only one query is required.
3268    ### This covers SQLite which requires one query per column.
3269    if not queries:
3270        queries.append(alter_table_query[:-1])
3271
3272    if self.flavor != 'duckdb':
3273        return queries
3274
3275    ### NOTE: For DuckDB, we must drop and rebuild the indices.
3276    drop_index_queries = list(flatten_list(
3277        [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()]
3278    ))
3279    create_index_queries = list(flatten_list(
3280        [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()]
3281    ))
3282
3283    return drop_index_queries + queries + create_index_queries

Add new null columns of the correct type to a table from a dataframe.

Parameters
  • pipe (mrsm.Pipe): The pipe to be altered.
  • df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which contains new columns. If a dictionary is provided, assume it maps columns to Pandas data types.
  • _is_db_types (bool, default False): If True, assume df is a dictionary mapping columns to SQL native dtypes.
Returns
  • A list of the ALTER TABLE SQL query or queries to be executed on the provided connector.
def get_alter_columns_queries( self, pipe: meerschaum.Pipe, df: 'Union[pd.DataFrame, Dict[str, str]]', debug: bool = False) -> List[str]:
3286def get_alter_columns_queries(
3287    self,
3288    pipe: mrsm.Pipe,
3289    df: Union[pd.DataFrame, Dict[str, str]],
3290    debug: bool = False,
3291) -> List[str]:
3292    """
3293    If we encounter a column of a different type, set the entire column to text.
3294    If the altered columns are numeric, alter to numeric instead.
3295
3296    Parameters
3297    ----------
3298    pipe: mrsm.Pipe
3299        The pipe to be altered.
3300
3301    df: Union[pd.DataFrame, Dict[str, str]]
3302        The pandas DataFrame which may contain altered columns.
3303        If a dict is provided, assume it maps columns to Pandas data types.
3304
3305    Returns
3306    -------
3307    A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
3308    """
3309    if not pipe.exists(debug=debug) or pipe.static:
3310        return []
3311
3312    from meerschaum.utils.sql import (
3313        sql_item_name,
3314        get_table_cols_types,
3315        DROP_IF_EXISTS_FLAVORS,
3316        SINGLE_ALTER_TABLE_FLAVORS,
3317    )
3318    from meerschaum.utils.dataframe import get_numeric_cols
3319    from meerschaum.utils.dtypes import are_dtypes_equal
3320    from meerschaum.utils.dtypes.sql import (
3321        get_pd_type_from_db_type,
3322        get_db_type_from_pd_type,
3323    )
3324    from meerschaum.utils.misc import flatten_list, generate_password, items_str
3325    target = pipe.target
3326    session_id = generate_password(3)
3327    numeric_cols = (
3328        get_numeric_cols(df)
3329        if not isinstance(df, dict)
3330        else [
3331            col
3332            for col, typ in df.items()
3333            if typ.startswith('numeric')
3334        ]
3335    )
3336    df_cols_types = (
3337        {
3338            col: str(typ)
3339            for col, typ in df.dtypes.items()
3340        }
3341        if not isinstance(df, dict)
3342        else df
3343    )
3344    db_cols_types = {
3345        col: get_pd_type_from_db_type(typ)
3346        for col, typ in get_table_cols_types(
3347            pipe.target,
3348            self,
3349            schema=self.get_pipe_schema(pipe),
3350            debug=debug,
3351        ).items()
3352    }
3353    pipe_dtypes = pipe.get_dtypes(debug=debug)
3354    pipe_bool_cols = [col for col, typ in pipe_dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
3355    pd_db_df_aliases = {
3356        'int': 'bool',
3357        'float': 'bool',
3358        'numeric': 'bool',
3359        'guid': 'object',
3360    }
3361    if self.flavor == 'oracle':
3362        pd_db_df_aliases.update({
3363            'int': 'numeric',
3364            'date': 'datetime',
3365            'numeric': 'int',
3366        })
3367    elif self.flavor == 'geopackage':
3368        pd_db_df_aliases.update({
3369            'geometry': 'bytes',
3370            'bytes': 'geometry',
3371        })
3372
3373    altered_cols = {
3374        col: (db_cols_types.get(col, 'object'), typ)
3375        for col, typ in df_cols_types.items()
3376        if not are_dtypes_equal(typ, db_cols_types.get(col, 'object').lower())
3377        and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string')
3378    }
3379
3380    if debug and altered_cols:
3381        dprint("Columns to be altered:")
3382        mrsm.pprint(altered_cols)
3383
3384    ### NOTE: Special columns (numerics, bools, etc.) are captured and cached upon detection.
3385    new_special_cols = pipe._get_cached_value('new_special_cols', debug=debug) or {}
3386    new_special_db_cols_types = {
3387        col: (db_cols_types.get(col, 'object'), typ)
3388        for col, typ in new_special_cols.items()
3389    }
3390    if debug:
3391        dprint("Cached new special columns:")
3392        mrsm.pprint(new_special_cols)
3393        dprint("New special columns db types:")
3394        mrsm.pprint(new_special_db_cols_types)
3395
3396    altered_cols.update(new_special_db_cols_types)
3397
3398    ### NOTE: Sometimes bools are coerced into ints or floats.
3399    altered_cols_to_ignore = set()
3400    for col, (db_typ, df_typ) in altered_cols.items():
3401        for db_alias, df_alias in pd_db_df_aliases.items():
3402            if (
3403                db_alias in db_typ.lower()
3404                and df_alias in df_typ.lower()
3405                and col not in new_special_cols
3406            ):
3407                altered_cols_to_ignore.add(col)
3408
3409    ### Oracle's bool handling sometimes mixes NUMBER and INT.
3410    for bool_col in pipe_bool_cols:
3411        if bool_col not in altered_cols:
3412            continue
3413        db_is_bool_compatible = (
3414            are_dtypes_equal('int', altered_cols[bool_col][0])
3415            or are_dtypes_equal('float', altered_cols[bool_col][0])
3416            or are_dtypes_equal('numeric', altered_cols[bool_col][0])
3417            or are_dtypes_equal('bool', altered_cols[bool_col][0])
3418        )
3419        df_is_bool_compatible = (
3420            are_dtypes_equal('int', altered_cols[bool_col][1])
3421            or are_dtypes_equal('float', altered_cols[bool_col][1])
3422            or are_dtypes_equal('numeric', altered_cols[bool_col][1])
3423            or are_dtypes_equal('bool', altered_cols[bool_col][1])
3424        )
3425        if db_is_bool_compatible and df_is_bool_compatible:
3426            altered_cols_to_ignore.add(bool_col)
3427
3428    if debug and altered_cols_to_ignore:
3429        dprint("Ignoring the following altered columns (false positives).")
3430        mrsm.pprint(altered_cols_to_ignore)
3431
3432    for col in altered_cols_to_ignore:
3433        _ = altered_cols.pop(col, None)
3434
3435    if not altered_cols:
3436        return []
3437
3438    if numeric_cols:
3439        explicit_pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug)
3440        explicit_pipe_dtypes.update({col: 'numeric' for col in numeric_cols})
3441        pipe.dtypes = explicit_pipe_dtypes
3442        if not pipe.temporary:
3443            edit_success, edit_msg = pipe.edit(debug=debug)
3444            if not edit_success:
3445                warn(
3446                    f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n"
3447                    + f"{edit_msg}"
3448                )
3449    else:
3450        numeric_cols.extend([col for col, typ in pipe_dtypes.items() if typ.startswith('numeric')])
3451
3452    numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
3453    text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
3454    altered_cols_types = {
3455        col: (
3456            numeric_type
3457            if col in numeric_cols
3458            else text_type
3459        )
3460        for col, (db_typ, typ) in altered_cols.items()
3461    }
3462
3463    if self.flavor in ('sqlite', 'geopackage'):
3464        temp_table_name = '-' + session_id + '_' + target
3465        rename_query = (
3466            "ALTER TABLE "
3467            + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3468            + " RENAME TO "
3469            + sql_item_name(temp_table_name, self.flavor, None)
3470        )
3471        create_query = (
3472            "CREATE TABLE "
3473            + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3474            + " (\n"
3475        )
3476        for col_name, col_typ in db_cols_types.items():
3477            create_query += (
3478                sql_item_name(col_name, self.flavor, None)
3479                + " "
3480                + (
3481                    col_typ
3482                    if col_name not in altered_cols
3483                    else altered_cols_types[col_name]
3484                )
3485                + ",\n"
3486            )
3487        create_query = create_query[:-2] + "\n)"
3488
3489        insert_query = (
3490            "INSERT INTO "
3491            + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3492            + ' ('
3493            + ', '.join([
3494                sql_item_name(col_name, self.flavor, None)
3495                for col_name in db_cols_types
3496            ])
3497            + ')'
3498            + "\nSELECT\n"
3499        )
3500        for col_name in db_cols_types:
3501            new_col_str = (
3502                sql_item_name(col_name, self.flavor, None)
3503                if col_name not in altered_cols
3504                else (
3505                    "CAST("
3506                    + sql_item_name(col_name, self.flavor, None)
3507                    + " AS "
3508                    + altered_cols_types[col_name]
3509                    + ")"
3510                )
3511            )
3512            insert_query += new_col_str + ",\n"
3513
3514        insert_query = insert_query[:-2] + (
3515            f"\nFROM {sql_item_name(temp_table_name, self.flavor, self.get_pipe_schema(pipe))}"
3516        )
3517
3518        if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
3519
3520        drop_query = f"DROP TABLE {if_exists_str}" + sql_item_name(
3521            temp_table_name, self.flavor, self.get_pipe_schema(pipe)
3522        )
3523        return [
3524            rename_query,
3525            create_query,
3526            insert_query,
3527            drop_query,
3528        ]
3529
3530    queries = []
3531    if self.flavor == 'oracle':
3532        for col, typ in altered_cols_types.items():
3533            add_query = (
3534                "ALTER TABLE "
3535                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3536                + "\nADD " + sql_item_name(col + '_temp', self.flavor, None)
3537                + " " + typ
3538            )
3539            queries.append(add_query)
3540
3541        for col, typ in altered_cols_types.items():
3542            populate_temp_query = (
3543                "UPDATE "
3544                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3545                + "\nSET " + sql_item_name(col + '_temp', self.flavor, None)
3546                + ' = ' + sql_item_name(col, self.flavor, None)
3547            )
3548            queries.append(populate_temp_query)
3549
3550        for col, typ in altered_cols_types.items():
3551            set_old_cols_to_null_query = (
3552                "UPDATE "
3553                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3554                + "\nSET " + sql_item_name(col, self.flavor, None)
3555                + ' = NULL'
3556            )
3557            queries.append(set_old_cols_to_null_query)
3558
3559        for col, typ in altered_cols_types.items():
3560            alter_type_query = (
3561                "ALTER TABLE "
3562                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3563                + "\nMODIFY " + sql_item_name(col, self.flavor, None) + ' '
3564                + typ
3565            )
3566            queries.append(alter_type_query)
3567
3568        for col, typ in altered_cols_types.items():
3569            set_old_to_temp_query = (
3570                "UPDATE "
3571                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3572                + "\nSET " + sql_item_name(col, self.flavor, None)
3573                + ' = ' + sql_item_name(col + '_temp', self.flavor, None)
3574            )
3575            queries.append(set_old_to_temp_query)
3576
3577        for col, typ in altered_cols_types.items():
3578            drop_temp_query = (
3579                "ALTER TABLE "
3580                + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3581                + "\nDROP COLUMN " + sql_item_name(col + '_temp', self.flavor, None)
3582            )
3583            queries.append(drop_temp_query)
3584
3585        return queries
3586
3587    query = "ALTER TABLE " + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3588    for col, typ in altered_cols_types.items():
3589        alter_col_prefix = (
3590            'ALTER' if self.flavor not in ('mysql', 'mariadb', 'oracle')
3591            else 'MODIFY'
3592        )
3593        type_prefix = (
3594            '' if self.flavor in ('mssql', 'mariadb', 'mysql')
3595            else 'TYPE '
3596        )
3597        column_str = 'COLUMN' if self.flavor != 'oracle' else ''
3598        query_suffix = (
3599            f"\n{alter_col_prefix} {column_str} "
3600            + sql_item_name(col, self.flavor, None)
3601            + " " + type_prefix + typ + ","
3602        )
3603        if self.flavor not in SINGLE_ALTER_TABLE_FLAVORS:
3604            query += query_suffix
3605        else:
3606            queries.append(query + query_suffix[:-1])
3607
3608    if self.flavor not in SINGLE_ALTER_TABLE_FLAVORS:
3609        queries.append(query[:-1])
3610
3611    if self.flavor != 'duckdb':
3612        return queries
3613
3614    drop_index_queries = list(flatten_list(
3615        [q for ix, q in self.get_drop_index_queries(pipe, debug=debug).items()]
3616    ))
3617    create_index_queries = list(flatten_list(
3618        [q for ix, q in self.get_create_index_queries(pipe, debug=debug).items()]
3619    ))
3620
3621    return drop_index_queries + queries + create_index_queries

If we encounter a column of a different type, set the entire column to text. If the altered columns are numeric, alter to numeric instead.

Parameters
  • pipe (mrsm.Pipe): The pipe to be altered.
  • df (Union[pd.DataFrame, Dict[str, str]]): The pandas DataFrame which may contain altered columns. If a dict is provided, assume it maps columns to Pandas data types.
Returns
  • A list of the ALTER TABLE SQL query or queries to be executed on the provided connector.
def delete_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
1018def delete_pipe(
1019    self,
1020    pipe: mrsm.Pipe,
1021    debug: bool = False,
1022) -> SuccessTuple:
1023    """
1024    Delete a Pipe's registration.
1025    """
1026    from meerschaum.utils.packages import attempt_import
1027    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
1028
1029    if not pipe.id:
1030        return False, f"{pipe} is not registered."
1031
1032    ### ensure pipes table exists
1033    from meerschaum.connectors.sql.tables import get_tables
1034    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
1035
1036    q = sqlalchemy.delete(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id)
1037    if not self.exec(q, debug=debug):
1038        return False, f"Failed to delete registration for {pipe}."
1039
1040    return True, "Success"

Delete a Pipe's registration.

def get_pipe_data( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[datetime.datetime, str, NoneType] = None, end: Union[datetime.datetime, str, NoneType] = None, params: Optional[Dict[str, Any]] = None, order: str = 'asc', limit: Optional[int] = None, begin_add_minutes: int = 0, end_add_minutes: int = 0, chunksize: Optional[int] = -1, as_iterator: bool = False, debug: bool = False, **kw: Any) -> 'Union[pd.DataFrame, None]':
1043def get_pipe_data(
1044    self,
1045    pipe: mrsm.Pipe,
1046    select_columns: Optional[List[str]] = None,
1047    omit_columns: Optional[List[str]] = None,
1048    begin: Union[datetime, str, None] = None,
1049    end: Union[datetime, str, None] = None,
1050    params: Optional[Dict[str, Any]] = None,
1051    order: str = 'asc',
1052    limit: Optional[int] = None,
1053    begin_add_minutes: int = 0,
1054    end_add_minutes: int = 0,
1055    chunksize: Optional[int] = -1,
1056    as_iterator: bool = False,
1057    debug: bool = False,
1058    **kw: Any
1059) -> Union[pd.DataFrame, None]:
1060    """
1061    Access a pipe's data from the SQL instance.
1062
1063    Parameters
1064    ----------
1065    pipe: mrsm.Pipe:
1066        The pipe to get data from.
1067
1068    select_columns: Optional[List[str]], default None
1069        If provided, only select these given columns.
1070        Otherwise select all available columns (i.e. `SELECT *`).
1071
1072    omit_columns: Optional[List[str]], default None
1073        If provided, remove these columns from the selection.
1074
1075    begin: Union[datetime, str, None], default None
1076        If provided, get rows newer than or equal to this value.
1077
1078    end: Union[datetime, str, None], default None
1079        If provided, get rows older than or equal to this value.
1080
1081    params: Optional[Dict[str, Any]], default None
1082        Additional parameters to filter by.
1083        See `meerschaum.connectors.sql.build_where`.
1084
1085    order: Optional[str], default 'asc'
1086        The selection order for all of the indices in the query.
1087        If `None`, omit the `ORDER BY` clause.
1088
1089    limit: Optional[int], default None
1090        If specified, limit the number of rows retrieved to this value.
1091
1092    begin_add_minutes: int, default 0
1093        The number of minutes to add to the `begin` datetime (i.e. `DATEADD`).
1094
1095    end_add_minutes: int, default 0
1096        The number of minutes to add to the `end` datetime (i.e. `DATEADD`).
1097
1098    chunksize: Optional[int], default -1
1099        The size of dataframe chunks to load into memory.
1100
1101    as_iterator: bool, default False
1102        If `True`, return the chunks iterator directly.
1103
1104    debug: bool, default False
1105        Verbosity toggle.
1106
1107    Returns
1108    -------
1109    A `pd.DataFrame` of the pipe's data.
1110
1111    """
1112    import functools
1113    from meerschaum.utils.packages import import_pandas
1114    from meerschaum.utils.dtypes import to_pandas_dtype, are_dtypes_equal
1115    from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1116    pd = import_pandas()
1117    is_dask = 'dask' in pd.__name__
1118
1119    cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
1120    pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug) if pipe.enforce else {}
1121
1122    remote_pandas_types = {
1123        col: to_pandas_dtype(get_pd_type_from_db_type(typ))
1124        for col, typ in cols_types.items()
1125    }
1126    remote_dt_cols_types = {
1127        col: typ
1128        for col, typ in remote_pandas_types.items()
1129        if are_dtypes_equal(typ, 'datetime')
1130    }
1131    remote_dt_tz_aware_cols_types = {
1132        col: typ
1133        for col, typ in remote_dt_cols_types.items()
1134        if ',' in typ or typ == 'datetime'
1135    }
1136    remote_dt_tz_naive_cols_types = {
1137        col: typ
1138        for col, typ in remote_dt_cols_types.items()
1139        if col not in remote_dt_tz_aware_cols_types
1140    }
1141
1142    configured_pandas_types = {
1143        col: to_pandas_dtype(typ)
1144        for col, typ in pipe_dtypes.items()
1145    }
1146    configured_lower_precision_dt_cols_types = {
1147        col: typ
1148        for col, typ in pipe_dtypes.items()
1149        if (
1150            are_dtypes_equal('datetime', typ)
1151            and '[' in typ
1152            and 'ns' not in typ
1153        )
1154        
1155    }
1156
1157    dtypes = {
1158        **remote_pandas_types,
1159        **configured_pandas_types,
1160        **remote_dt_tz_aware_cols_types,
1161        **remote_dt_tz_naive_cols_types,
1162        **configured_lower_precision_dt_cols_types
1163    } if pipe.enforce else {}
1164
1165    existing_cols = cols_types.keys()
1166    select_columns = (
1167        [
1168            col
1169            for col in existing_cols
1170            if col not in (omit_columns or [])
1171        ]
1172        if not select_columns
1173        else [
1174            col
1175            for col in select_columns
1176            if col in existing_cols
1177            and col not in (omit_columns or [])
1178        ]
1179    ) if pipe.enforce else select_columns
1180
1181    if select_columns:
1182        dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
1183
1184    dtypes = {
1185        col: typ
1186        for col, typ in dtypes.items()
1187        if col in (select_columns or [col]) and col not in (omit_columns or [])
1188    } if pipe.enforce else {}
1189
1190    if debug:
1191        dprint(f"[{self}] `read()` dtypes:")
1192        mrsm.pprint(dtypes)
1193
1194    query = self.get_pipe_data_query(
1195        pipe,
1196        select_columns=select_columns,
1197        omit_columns=omit_columns,
1198        begin=begin,
1199        end=end,
1200        params=params,
1201        order=order,
1202        limit=limit,
1203        begin_add_minutes=begin_add_minutes,
1204        end_add_minutes=end_add_minutes,
1205        debug=debug,
1206        **kw
1207    )
1208
1209    read_kwargs = {}
1210    if is_dask:
1211        index_col = pipe.columns.get('datetime', None)
1212        read_kwargs['index_col'] = index_col
1213
1214    chunks = self.read(
1215        query,
1216        chunksize=chunksize,
1217        as_iterator=True,
1218        coerce_float=False,
1219        dtype=dtypes,
1220        debug=debug,
1221        **read_kwargs
1222    )
1223
1224    if as_iterator:
1225        return chunks
1226
1227    return pd.concat(chunks)

Access a pipe's data from the SQL instance.

Parameters
  • pipe (mrsm.Pipe:): The pipe to get data from.
  • select_columns (Optional[List[str]], default None): If provided, only select these given columns. Otherwise select all available columns (i.e. SELECT *).
  • omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
  • begin (Union[datetime, str, None], default None): If provided, get rows newer than or equal to this value.
  • end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
  • params (Optional[Dict[str, Any]], default None): Additional parameters to filter by. See meerschaum.connectors.sql.build_where.
  • order (Optional[str], default 'asc'): The selection order for all of the indices in the query. If None, omit the ORDER BY clause.
  • limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
  • begin_add_minutes (int, default 0): The number of minutes to add to the begin datetime (i.e. DATEADD).
  • end_add_minutes (int, default 0): The number of minutes to add to the end datetime (i.e. DATEADD).
  • chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
  • as_iterator (bool, default False): If True, return the chunks iterator directly.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A pd.DataFrame of the pipe's data.
def get_pipe_data_query( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[datetime.datetime, int, str, NoneType] = None, end: Union[datetime.datetime, int, str, NoneType] = None, params: Optional[Dict[str, Any]] = None, order: Optional[str] = 'asc', sort_datetimes: bool = False, limit: Optional[int] = None, begin_add_minutes: int = 0, end_add_minutes: int = 0, replace_nulls: Optional[str] = None, skip_existing_cols_check: bool = False, debug: bool = False, **kw: Any) -> Optional[str]:
1230def get_pipe_data_query(
1231    self,
1232    pipe: mrsm.Pipe,
1233    select_columns: Optional[List[str]] = None,
1234    omit_columns: Optional[List[str]] = None,
1235    begin: Union[datetime, int, str, None] = None,
1236    end: Union[datetime, int, str, None] = None,
1237    params: Optional[Dict[str, Any]] = None,
1238    order: Optional[str] = 'asc',
1239    sort_datetimes: bool = False,
1240    limit: Optional[int] = None,
1241    begin_add_minutes: int = 0,
1242    end_add_minutes: int = 0,
1243    replace_nulls: Optional[str] = None,
1244    skip_existing_cols_check: bool = False,
1245    debug: bool = False,
1246    **kw: Any
1247) -> Union[str, None]:
1248    """
1249    Return the `SELECT` query for retrieving a pipe's data from its instance.
1250
1251    Parameters
1252    ----------
1253    pipe: mrsm.Pipe:
1254        The pipe to get data from.
1255
1256    select_columns: Optional[List[str]], default None
1257        If provided, only select these given columns.
1258        Otherwise select all available columns (i.e. `SELECT *`).
1259
1260    omit_columns: Optional[List[str]], default None
1261        If provided, remove these columns from the selection.
1262
1263    begin: Union[datetime, int, str, None], default None
1264        If provided, get rows newer than or equal to this value.
1265
1266    end: Union[datetime, str, None], default None
1267        If provided, get rows older than or equal to this value.
1268
1269    params: Optional[Dict[str, Any]], default None
1270        Additional parameters to filter by.
1271        See `meerschaum.connectors.sql.build_where`.
1272
1273    order: Optional[str], default None
1274        The selection order for all of the indices in the query.
1275        If `None`, omit the `ORDER BY` clause.
1276
1277    sort_datetimes: bool, default False
1278        Alias for `order='desc'`.
1279
1280    limit: Optional[int], default None
1281        If specified, limit the number of rows retrieved to this value.
1282
1283    begin_add_minutes: int, default 0
1284        The number of minutes to add to the `begin` datetime (i.e. `DATEADD`).
1285
1286    end_add_minutes: int, default 0
1287        The number of minutes to add to the `end` datetime (i.e. `DATEADD`).
1288
1289    chunksize: Optional[int], default -1
1290        The size of dataframe chunks to load into memory.
1291
1292    replace_nulls: Optional[str], default None
1293        If provided, replace null values with this value.
1294
1295    skip_existing_cols_check: bool, default False
1296        If `True`, do not verify that querying columns are actually on the table.
1297
1298    debug: bool, default False
1299        Verbosity toggle.
1300
1301    Returns
1302    -------
1303    A `SELECT` query to retrieve a pipe's data.
1304    """
1305    from meerschaum.utils.misc import items_str
1306    from meerschaum.utils.sql import sql_item_name, dateadd_str
1307    from meerschaum.utils.dtypes import coerce_timezone
1308    from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type, get_db_type_from_pd_type
1309
1310    dt_col = pipe.columns.get('datetime', None)
1311    existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
1312    skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
1313    dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
1314    dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
1315    select_columns = (
1316        [col for col in existing_cols]
1317        if not select_columns
1318        else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
1319    )
1320    if omit_columns:
1321        select_columns = [col for col in select_columns if col not in omit_columns]
1322
1323    if order is None and sort_datetimes:
1324        order = 'desc'
1325
1326    if begin == '':
1327        begin = pipe.get_sync_time(debug=debug)
1328        backtrack_interval = pipe.get_backtrack_interval(debug=debug)
1329        if begin is not None:
1330            begin -= backtrack_interval
1331
1332    begin, end = pipe.parse_date_bounds(begin, end)
1333    if isinstance(begin, datetime) and dt_typ:
1334        begin = coerce_timezone(begin, strip_utc=('utc' not in dt_typ.lower()))
1335    if isinstance(end, datetime) and dt_typ:
1336        end = coerce_timezone(end, strip_utc=('utc' not in dt_typ.lower()))
1337
1338    cols_names = [
1339        sql_item_name(col, self.flavor, None)
1340        for col in select_columns
1341    ]
1342    select_cols_str = (
1343        'SELECT\n    '
1344        + ',\n    '.join(
1345            [
1346                (
1347                    col_name
1348                    if not replace_nulls
1349                    else f"COALESCE(col_name, '{replace_nulls}') AS {col_name}"
1350                )
1351                for col_name in cols_names
1352            ]
1353        )
1354    ) if cols_names else 'SELECT *'
1355    pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
1356    query = f"{select_cols_str}\nFROM {pipe_table_name}"
1357    where = ""
1358
1359    if order is not None:
1360        default_order = 'asc'
1361        if order not in ('asc', 'desc'):
1362            warn(f"Ignoring unsupported order '{order}'. Falling back to '{default_order}'.")
1363            order = default_order
1364        order = order.upper()
1365
1366    if not pipe.columns.get('datetime', None):
1367        _dt = pipe.guess_datetime()
1368        dt = sql_item_name(_dt, self.flavor, None) if _dt else None
1369        is_guess = True
1370    else:
1371        _dt = pipe.get_columns('datetime')
1372        dt = sql_item_name(_dt, self.flavor, None)
1373        is_guess = False
1374
1375    quoted_indices = {
1376        key: sql_item_name(val, self.flavor, None)
1377        for key, val in pipe.columns.items()
1378        if val in existing_cols or skip_existing_cols_check
1379    }
1380
1381    if begin is not None or end is not None:
1382        if is_guess:
1383            if _dt is None:
1384                warn(
1385                    f"No datetime could be determined for {pipe}."
1386                    + "\n    Ignoring begin and end...",
1387                    stack=False,
1388                )
1389                begin, end = None, None
1390            else:
1391                warn(
1392                    f"A datetime wasn't specified for {pipe}.\n"
1393                    + f"    Using column \"{_dt}\" for datetime bounds...",
1394                    stack=False,
1395                )
1396
1397    is_dt_bound = False
1398    if begin is not None and (_dt in existing_cols or skip_existing_cols_check):
1399        begin_da = dateadd_str(
1400            flavor=self.flavor,
1401            datepart='minute',
1402            number=begin_add_minutes,
1403            begin=begin,
1404            db_type=dt_db_type,
1405        )
1406        where += f"\n    {dt} >= {begin_da}" + ("\n    AND\n    " if end is not None else "")
1407        is_dt_bound = True
1408
1409    if end is not None and (_dt in existing_cols or skip_existing_cols_check):
1410        if 'int' in str(type(end)).lower() and end == begin:
1411            end += 1
1412        end_da = dateadd_str(
1413            flavor=self.flavor,
1414            datepart='minute',
1415            number=end_add_minutes,
1416            begin=end,
1417            db_type=dt_db_type,
1418        )
1419        where += f"{dt} <  {end_da}"
1420        is_dt_bound = True
1421
1422    if params is not None:
1423        from meerschaum.utils.sql import build_where
1424        valid_params = {
1425            k: v
1426            for k, v in params.items()
1427            if k in existing_cols or skip_existing_cols_check
1428        }
1429        if valid_params:
1430            where += '    ' + build_where(valid_params, self).lstrip().replace(
1431                'WHERE', ('    AND' if is_dt_bound else "    ")
1432            )
1433
1434    if len(where) > 0:
1435        query += "\nWHERE " + where
1436
1437    if order is not None:
1438        ### Sort by indices, starting with datetime.
1439        order_by = ""
1440        if quoted_indices:
1441            order_by += "\nORDER BY "
1442            if _dt and (_dt in existing_cols or skip_existing_cols_check):
1443                order_by += dt + ' ' + order + ','
1444            for key, quoted_col_name in quoted_indices.items():
1445                if dt == quoted_col_name:
1446                    continue
1447                order_by += ' ' + quoted_col_name + ' ' + order + ','
1448            order_by = order_by[:-1]
1449
1450        query += order_by
1451
1452    if isinstance(limit, int):
1453        if self.flavor == 'mssql':
1454            query = f'SELECT TOP {limit}\n' + query[len("SELECT "):]
1455        elif self.flavor == 'oracle':
1456            query = (
1457                f"SELECT * FROM (\n  {query}\n)\n"
1458                + f"WHERE ROWNUM IN ({', '.join([str(i) for i in range(1, limit+1)])})"
1459            )
1460        else:
1461            query += f"\nLIMIT {limit}"
1462
1463    if debug:
1464        to_print = (
1465            []
1466            + ([f"begin='{begin}'"] if begin else [])
1467            + ([f"end='{end}'"] if end else [])
1468            + ([f"params={params}"] if params else [])
1469        )
1470        dprint("Getting pipe data with constraints: " + items_str(to_print, quotes=False))
1471
1472    return query

Return the SELECT query for retrieving a pipe's data from its instance.

Parameters
  • pipe (mrsm.Pipe:): The pipe to get data from.
  • select_columns (Optional[List[str]], default None): If provided, only select these given columns. Otherwise select all available columns (i.e. SELECT *).
  • omit_columns (Optional[List[str]], default None): If provided, remove these columns from the selection.
  • begin (Union[datetime, int, str, None], default None): If provided, get rows newer than or equal to this value.
  • end (Union[datetime, str, None], default None): If provided, get rows older than or equal to this value.
  • params (Optional[Dict[str, Any]], default None): Additional parameters to filter by. See meerschaum.connectors.sql.build_where.
  • order (Optional[str], default None): The selection order for all of the indices in the query. If None, omit the ORDER BY clause.
  • sort_datetimes (bool, default False): Alias for order='desc'.
  • limit (Optional[int], default None): If specified, limit the number of rows retrieved to this value.
  • begin_add_minutes (int, default 0): The number of minutes to add to the begin datetime (i.e. DATEADD).
  • end_add_minutes (int, default 0): The number of minutes to add to the end datetime (i.e. DATEADD).
  • chunksize (Optional[int], default -1): The size of dataframe chunks to load into memory.
  • replace_nulls (Optional[str], default None): If provided, replace null values with this value.
  • skip_existing_cols_check (bool, default False): If True, do not verify that querying columns are actually on the table.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SELECT query to retrieve a pipe's data.
def register_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
21def register_pipe(
22    self,
23    pipe: mrsm.Pipe,
24    debug: bool = False,
25) -> SuccessTuple:
26    """
27    Register a new pipe.
28    A pipe's attributes must be set before registering.
29    """
30    from meerschaum.utils.packages import attempt_import
31    from meerschaum.utils.sql import json_flavors
32
33    ### ensure pipes table exists
34    from meerschaum.connectors.sql.tables import get_tables
35    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
36
37    if pipe.get_id(debug=debug) is not None:
38        return False, f"{pipe} is already registered."
39
40    ### NOTE: if `parameters` is supplied in the Pipe constructor,
41    ###       then `pipe.parameters` will exist and not be fetched from the database.
42
43    ### 1. Prioritize the Pipe object's `parameters` first.
44    ###    E.g. if the user manually sets the `parameters` property
45    ###    or if the Pipe already exists
46    ###    (which shouldn't be able to be registered anyway but that's an issue for later).
47    parameters = None
48    try:
49        parameters = pipe.get_parameters(apply_symlinks=False)
50    except Exception as e:
51        if debug:
52            dprint(str(e))
53        parameters = None
54
55    ### ensure `parameters` is a dictionary
56    if parameters is None:
57        parameters = {}
58
59    import json
60    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
61    values = {
62        'connector_keys' : pipe.connector_keys,
63        'metric_key'     : pipe.metric_key,
64        'location_key'   : pipe.location_key,
65        'parameters'     : (
66            json.dumps(parameters)
67            if self.flavor not in json_flavors
68            else parameters
69        ),
70    }
71    query = sqlalchemy.insert(pipes_tbl).values(**values)
72    result = self.exec(query, debug=debug)
73    if result is None:
74        return False, f"Failed to register {pipe}."
75    return True, f"Successfully registered {pipe}."

Register a new pipe. A pipe's attributes must be set before registering.

def edit_pipe( self, pipe: meerschaum.Pipe, patch: bool = False, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
 78def edit_pipe(
 79    self,
 80    pipe: mrsm.Pipe,
 81    patch: bool = False,
 82    debug: bool = False,
 83    **kw : Any
 84) -> SuccessTuple:
 85    """
 86    Persist a Pipe's parameters to its database.
 87
 88    Parameters
 89    ----------
 90    pipe: mrsm.Pipe, default None
 91        The pipe to be edited.
 92    patch: bool, default False
 93        If patch is `True`, update the existing parameters by cascading.
 94        Otherwise overwrite the parameters (default).
 95    debug: bool, default False
 96        Verbosity toggle.
 97    """
 98
 99    if pipe.id is None:
100        return False, f"{pipe} is not registered and cannot be edited."
101
102    from meerschaum.utils.packages import attempt_import
103    from meerschaum.utils.sql import json_flavors
104    if not patch:
105        parameters = pipe.__dict__.get('_attributes', {}).get('parameters', {})
106    else:
107        from meerschaum import Pipe
108        from meerschaum.config._patch import apply_patch_to_config
109        original_parameters = Pipe(
110            pipe.connector_keys, pipe.metric_key, pipe.location_key,
111            mrsm_instance=pipe.instance_keys
112        ).get_parameters(apply_symlinks=False)
113        parameters = apply_patch_to_config(
114            original_parameters,
115            pipe._attributes['parameters']
116        )
117
118    ### ensure pipes table exists
119    from meerschaum.connectors.sql.tables import get_tables
120    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
121
122    import json
123    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
124
125    values = {
126        'parameters': (
127            json.dumps(parameters)
128            if self.flavor not in json_flavors
129            else parameters
130        ),
131    }
132    q = sqlalchemy.update(pipes_tbl).values(**values).where(
133        pipes_tbl.c.pipe_id == pipe.id
134    )
135
136    result = self.exec(q, debug=debug)
137    message = (
138        f"Successfully edited {pipe}."
139        if result is not None else f"Failed to edit {pipe}."
140    )
141    return (result is not None), message

Persist a Pipe's parameters to its database.

Parameters
  • pipe (mrsm.Pipe, default None): The pipe to be edited.
  • patch (bool, default False): If patch is True, update the existing parameters by cascading. Otherwise overwrite the parameters (default).
  • debug (bool, default False): Verbosity toggle.
def get_pipe_id(self, pipe: meerschaum.Pipe, debug: bool = False) -> Any:
1475def get_pipe_id(
1476    self,
1477    pipe: mrsm.Pipe,
1478    debug: bool = False,
1479) -> Any:
1480    """
1481    Get a Pipe's ID from the pipes table.
1482    """
1483    if pipe.temporary:
1484        return None
1485    from meerschaum.utils.packages import attempt_import
1486    sqlalchemy = attempt_import('sqlalchemy')
1487    from meerschaum.connectors.sql.tables import get_tables
1488    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
1489
1490    query = sqlalchemy.select(pipes_tbl.c.pipe_id).where(
1491        pipes_tbl.c.connector_keys == pipe.connector_keys
1492    ).where(
1493        pipes_tbl.c.metric_key == pipe.metric_key
1494    ).where(
1495        (pipes_tbl.c.location_key == pipe.location_key) if pipe.location_key is not None
1496        else pipes_tbl.c.location_key.is_(None)
1497    )
1498    _id = self.value(query, debug=debug, silent=pipe.temporary)
1499    if _id is not None:
1500        _id = int(_id)
1501    return _id

Get a Pipe's ID from the pipes table.

def get_pipe_attributes( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, Any]:
1504def get_pipe_attributes(
1505    self,
1506    pipe: mrsm.Pipe,
1507    debug: bool = False,
1508) -> Dict[str, Any]:
1509    """
1510    Get a Pipe's attributes dictionary.
1511    """
1512    from meerschaum.connectors.sql.tables import get_tables
1513    from meerschaum.utils.packages import attempt_import
1514    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
1515
1516    if pipe.get_id(debug=debug) is None:
1517        return {}
1518
1519    pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
1520
1521    try:
1522        q = sqlalchemy.select(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id)
1523        if debug:
1524            dprint(q)
1525        rows = (
1526            self.exec(q, silent=True, debug=debug).mappings().all()
1527            if self.flavor != 'duckdb'
1528            else self.read(q, debug=debug).to_dict(orient='records')
1529        )
1530        if not rows:
1531            return {}
1532        attributes = dict(rows[0])
1533    except Exception:
1534        warn(traceback.format_exc())
1535        return {}
1536
1537    ### handle non-PostgreSQL databases (text vs JSON)
1538    if not isinstance(attributes.get('parameters', None), dict):
1539        try:
1540            import json
1541            parameters = json.loads(attributes['parameters'])
1542            if isinstance(parameters, str) and parameters[0] == '{':
1543                parameters = json.loads(parameters)
1544            attributes['parameters'] = parameters
1545        except Exception:
1546            attributes['parameters'] = {}
1547
1548    return attributes

Get a Pipe's attributes dictionary.

def sync_pipe( self, pipe: meerschaum.Pipe, df: 'Union[pd.DataFrame, str, Dict[Any, Any], None]' = None, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, chunksize: Optional[int] = -1, check_existing: bool = True, blocking: bool = True, debug: bool = False, _check_temporary_tables: bool = True, **kw: Any) -> Tuple[bool, str]:
1634def sync_pipe(
1635    self,
1636    pipe: mrsm.Pipe,
1637    df: Union[pd.DataFrame, str, Dict[Any, Any], None] = None,
1638    begin: Union[datetime, int, None] = None,
1639    end: Union[datetime, int, None] = None,
1640    chunksize: Optional[int] = -1,
1641    check_existing: bool = True,
1642    blocking: bool = True,
1643    debug: bool = False,
1644    _check_temporary_tables: bool = True,
1645    **kw: Any
1646) -> SuccessTuple:
1647    """
1648    Sync a pipe using a database connection.
1649
1650    Parameters
1651    ----------
1652    pipe: mrsm.Pipe
1653        The Meerschaum Pipe instance into which to sync the data.
1654
1655    df: Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]]
1656        An optional DataFrame or equivalent to sync into the pipe.
1657        Defaults to `None`.
1658
1659    begin: Union[datetime, int, None], default None
1660        Optionally specify the earliest datetime to search for data.
1661        Defaults to `None`.
1662
1663    end: Union[datetime, int, None], default None
1664        Optionally specify the latest datetime to search for data.
1665        Defaults to `None`.
1666
1667    chunksize: Optional[int], default -1
1668        Specify the number of rows to sync per chunk.
1669        If `-1`, resort to system configuration (default is `900`).
1670        A `chunksize` of `None` will sync all rows in one transaction.
1671        Defaults to `-1`.
1672
1673    check_existing: bool, default True
1674        If `True`, pull and diff with existing data from the pipe. Defaults to `True`.
1675
1676    blocking: bool, default True
1677        If `True`, wait for sync to finish and return its result, otherwise asyncronously sync.
1678        Defaults to `True`.
1679
1680    debug: bool, default False
1681        Verbosity toggle. Defaults to False.
1682
1683    kw: Any
1684        Catch-all for keyword arguments.
1685
1686    Returns
1687    -------
1688    A `SuccessTuple` of success (`bool`) and message (`str`).
1689    """
1690    from meerschaum.utils.packages import import_pandas
1691    from meerschaum.utils.sql import (
1692        get_update_queries,
1693        sql_item_name,
1694        UPDATE_QUERIES,
1695        get_reset_autoincrement_queries,
1696    )
1697    from meerschaum.utils.dtypes import get_current_timestamp
1698    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1699    from meerschaum.utils.dataframe import get_special_cols
1700    from meerschaum import Pipe
1701    import time
1702    import copy
1703    pd = import_pandas()
1704    if df is None:
1705        msg = f"DataFrame is None. Cannot sync {pipe}."
1706        warn(msg)
1707        return False, msg
1708
1709    start = time.perf_counter()
1710    pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe))
1711    dtypes = pipe.get_dtypes(debug=debug)
1712
1713    if not pipe.temporary and not pipe.get_id(debug=debug):
1714        register_tuple = pipe.register(debug=debug)
1715        if not register_tuple[0]:
1716            return register_tuple
1717
1718    ### df is the dataframe returned from the remote source
1719    ### via the connector
1720    if debug:
1721        dprint("Fetched data:\n" + str(df))
1722
1723    if not isinstance(df, pd.DataFrame):
1724        df = pipe.enforce_dtypes(
1725            df,
1726            chunksize=chunksize,
1727            safe_copy=kw.get('safe_copy', False),
1728            dtypes=dtypes,
1729            debug=debug,
1730        )
1731
1732    ### if table does not exist, create it with indices
1733    is_new = False
1734    if not pipe.exists(debug=debug):
1735        check_existing = False
1736        is_new = True
1737    else:
1738        ### Check for new columns.
1739        add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug)
1740        if add_cols_queries:
1741            pipe._clear_cache_key('_columns_types', debug=debug)
1742            pipe._clear_cache_key('_columns_indices', debug=debug)
1743            if not self.exec_queries(add_cols_queries, debug=debug):
1744                warn(f"Failed to add new columns to {pipe}.")
1745
1746        alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug)
1747        if alter_cols_queries:
1748            pipe._clear_cache_key('_columns_types', debug=debug)
1749            pipe._clear_cache_key('_columns_types', debug=debug)
1750            if not self.exec_queries(alter_cols_queries, debug=debug):
1751                warn(f"Failed to alter columns for {pipe}.")
1752
1753    upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
1754    if upsert:
1755        check_existing = False
1756    kw['safe_copy'] = kw.get('safe_copy', False)
1757
1758    unseen_df, update_df, delta_df = (
1759        pipe.filter_existing(
1760            df,
1761            chunksize=chunksize,
1762            debug=debug,
1763            **kw
1764        ) if check_existing else (df, None, df)
1765    )
1766    if upsert:
1767        unseen_df, update_df, delta_df = (df.head(0), df, df)
1768
1769    if debug:
1770        dprint("Delta data:\n" + str(delta_df))
1771        dprint("Unseen data:\n" + str(unseen_df))
1772        if update_df is not None:
1773            dprint(("Update" if not upsert else "Upsert") + " data:\n" + str(update_df))
1774
1775    if_exists = kw.get('if_exists', 'append')
1776    if 'if_exists' in kw:
1777        kw.pop('if_exists')
1778    if 'name' in kw:
1779        kw.pop('name')
1780
1781    ### Insert new data into the target table.
1782    unseen_kw = copy.deepcopy(kw)
1783    unseen_kw.update({
1784        'name': pipe.target,
1785        'if_exists': if_exists,
1786        'debug': debug,
1787        'as_dict': True,
1788        'safe_copy': kw.get('safe_copy', False),
1789        'chunksize': chunksize,
1790        'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
1791        'schema': self.get_pipe_schema(pipe),
1792    })
1793
1794    dt_col = pipe.columns.get('datetime', None)
1795    primary_key = pipe.columns.get('primary', None)
1796    autoincrement = (
1797        pipe.parameters.get('autoincrement', False)
1798        or (
1799            is_new
1800            and primary_key
1801            and primary_key
1802            not in dtypes
1803            and primary_key not in unseen_df.columns
1804        )
1805    )
1806    if autoincrement and autoincrement not in pipe.parameters:
1807        update_success, update_msg = pipe.update_parameters(
1808            {'autoincrement': autoincrement},
1809            debug=debug,
1810        )
1811        if not update_success:
1812            return update_success, update_msg
1813
1814    def _check_pk(_df_to_clear):
1815        if _df_to_clear is None:
1816            return
1817        if primary_key not in _df_to_clear.columns:
1818            return
1819        if not _df_to_clear[primary_key].notnull().any():
1820            del _df_to_clear[primary_key]
1821
1822    autoincrement_needs_reset = bool(
1823        autoincrement
1824        and primary_key
1825        and primary_key in unseen_df.columns
1826        and unseen_df[primary_key].notnull().any()
1827    )
1828    if autoincrement and primary_key:
1829        for _df_to_clear in (unseen_df, update_df, delta_df):
1830            _check_pk(_df_to_clear)
1831
1832    if is_new:
1833        create_success, create_msg = self.create_pipe_table_from_df(
1834            pipe,
1835            unseen_df,
1836            debug=debug,
1837        )
1838        if not create_success:
1839            return create_success, create_msg
1840
1841    do_identity_insert = bool(
1842        self.flavor in ('mssql',)
1843        and primary_key
1844        and primary_key in unseen_df.columns
1845        and autoincrement
1846    )
1847    stats = {'success': True, 'msg': ''}
1848    if len(unseen_df) > 0:
1849        with self.engine.connect() as connection:
1850            with connection.begin():
1851                if do_identity_insert:
1852                    identity_on_result = self.exec(
1853                        f"SET IDENTITY_INSERT {pipe_name} ON",
1854                        commit=False,
1855                        _connection=connection,
1856                        close=False,
1857                        debug=debug,
1858                    )
1859                    if identity_on_result is None:
1860                        return False, f"Could not enable identity inserts on {pipe}."
1861
1862                stats = self.to_sql(
1863                    unseen_df,
1864                    _connection=connection,
1865                    **unseen_kw
1866                )
1867
1868                if do_identity_insert:
1869                    identity_off_result = self.exec(
1870                        f"SET IDENTITY_INSERT {pipe_name} OFF",
1871                        commit=False,
1872                        _connection=connection,
1873                        close=False,
1874                        debug=debug,
1875                    )
1876                    if identity_off_result is None:
1877                        return False, f"Could not disable identity inserts on {pipe}."
1878
1879    if is_new:
1880        if not self.create_indices(pipe, debug=debug):
1881            warn(f"Failed to create indices for {pipe}. Continuing...")
1882
1883    if autoincrement_needs_reset:
1884        reset_autoincrement_queries = get_reset_autoincrement_queries(
1885            pipe.target,
1886            primary_key,
1887            self,
1888            schema=self.get_pipe_schema(pipe),
1889            debug=debug,
1890        )
1891        results = self.exec_queries(reset_autoincrement_queries, debug=debug)
1892        for result in results:
1893            if result is None:
1894                warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
1895
1896    if update_df is not None and len(update_df) > 0:
1897        temp_target = self.get_temporary_target(
1898            pipe.target,
1899            label=('update' if not upsert else 'upsert'),
1900        )
1901        self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
1902        update_dtypes = {
1903            **{
1904                col: str(typ)
1905                for col, typ in update_df.dtypes.items()
1906            },
1907            **get_special_cols(update_df)
1908        }
1909
1910        temp_pipe = Pipe(
1911            pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
1912            instance=pipe.instance_keys,
1913            columns={
1914                (ix_key if ix_key != 'primary' else 'primary_'): ix
1915                for ix_key, ix in pipe.columns.items()
1916                if ix and ix in update_df.columns
1917            },
1918            dtypes=update_dtypes,
1919            target=temp_target,
1920            temporary=True,
1921            enforce=False,
1922            static=True,
1923            autoincrement=False,
1924            cache=False,
1925            parameters={
1926                'schema': self.internal_schema,
1927                'hypertable': False,
1928            },
1929        )
1930        _temp_columns_types = {
1931            col: get_db_type_from_pd_type(typ, self.flavor)
1932            for col, typ in update_dtypes.items()
1933        }
1934        temp_pipe._cache_value('_columns_types', _temp_columns_types, memory_only=True, debug=debug)
1935        temp_pipe._cache_value('_skip_check_indices', True, memory_only=True, debug=debug)
1936        now_ts = get_current_timestamp('ms', as_int=True) / 1000
1937        temp_pipe._cache_value('_columns_types_timestamp', now_ts, memory_only=True, debug=debug)
1938        temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug)
1939        if not temp_success:
1940            return temp_success, temp_msg
1941
1942        existing_cols = pipe.get_columns_types(debug=debug)
1943        join_cols = [
1944            col
1945            for col_key, col in pipe.columns.items()
1946            if col and col in existing_cols
1947        ] if not primary_key or self.flavor == 'oracle' else (
1948            [dt_col, primary_key]
1949            if (
1950                self.flavor in ('timescaledb', 'timescaledb-ha')
1951                and dt_col
1952                and dt_col in update_df.columns
1953            )
1954            else [primary_key]
1955        )
1956        update_queries = get_update_queries(
1957            pipe.target,
1958            temp_target,
1959            self,
1960            join_cols,
1961            upsert=upsert,
1962            schema=self.get_pipe_schema(pipe),
1963            patch_schema=self.internal_schema,
1964            target_cols_types=pipe.get_columns_types(debug=debug),
1965            patch_cols_types=_temp_columns_types,
1966            datetime_col=(dt_col if dt_col in update_df.columns else None),
1967            identity_insert=(autoincrement and primary_key in update_df.columns),
1968            null_indices=pipe.null_indices,
1969            cast_columns=pipe.enforce,
1970            debug=debug,
1971        )
1972        update_results = self.exec_queries(
1973            update_queries,
1974            break_on_error=True,
1975            rollback=True,
1976            debug=debug,
1977        )
1978        update_success = all(update_results)
1979        self._log_temporary_tables_creation(
1980            temp_target,
1981            ready_to_drop=True,
1982            create=(not pipe.temporary),
1983            debug=debug,
1984        )
1985        if not update_success:
1986            warn(f"Failed to apply update to {pipe}.")
1987        stats['success'] = stats['success'] and update_success
1988        stats['msg'] = (
1989            (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
1990            if not update_success
1991            else stats.get('msg', '')
1992        )
1993
1994    stop = time.perf_counter()
1995    success = stats['success']
1996    if not success:
1997        return success, stats['msg'] or str(stats)
1998
1999    unseen_count = len(unseen_df.index) if unseen_df is not None else 0
2000    update_count = len(update_df.index) if update_df is not None else 0
2001    msg = (
2002        (
2003            f"Inserted {unseen_count:,}, "
2004            + f"updated {update_count:,} rows."
2005        )
2006        if not upsert
2007        else (
2008            f"Upserted {update_count:,} row"
2009            + ('s' if update_count != 1 else '')
2010            + "."
2011        )
2012    )
2013    if debug:
2014        msg = msg[:-1] + (
2015            f"\non table {sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))}\n"
2016            + f"in {round(stop - start, 2)} seconds."
2017        )
2018
2019    if _check_temporary_tables:
2020        drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables(
2021            refresh=False, debug=debug
2022        )
2023        if not drop_stale_success:
2024            warn(drop_stale_msg)
2025
2026    return success, msg

Sync a pipe using a database connection.

Parameters
  • pipe (mrsm.Pipe): The Meerschaum Pipe instance into which to sync the data.
  • df (Union[pandas.DataFrame, str, Dict[Any, Any], List[Dict[str, Any]]]): An optional DataFrame or equivalent to sync into the pipe. Defaults to None.
  • begin (Union[datetime, int, None], default None): Optionally specify the earliest datetime to search for data. Defaults to None.
  • end (Union[datetime, int, None], default None): Optionally specify the latest datetime to search for data. Defaults to None.
  • chunksize (Optional[int], default -1): Specify the number of rows to sync per chunk. If -1, resort to system configuration (default is 900). A chunksize of None will sync all rows in one transaction. Defaults to -1.
  • check_existing (bool, default True): If True, pull and diff with existing data from the pipe. Defaults to True.
  • blocking (bool, default True): If True, wait for sync to finish and return its result, otherwise asyncronously sync. Defaults to True.
  • debug (bool, default False): Verbosity toggle. Defaults to False.
  • kw (Any): Catch-all for keyword arguments.
Returns
  • A SuccessTuple of success (bool) and message (str).
def sync_pipe_inplace( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, chunksize: Optional[int] = -1, check_existing: bool = True, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
2029def sync_pipe_inplace(
2030    self,
2031    pipe: 'mrsm.Pipe',
2032    params: Optional[Dict[str, Any]] = None,
2033    begin: Union[datetime, int, None] = None,
2034    end: Union[datetime, int, None] = None,
2035    chunksize: Optional[int] = -1,
2036    check_existing: bool = True,
2037    debug: bool = False,
2038    **kw: Any
2039) -> SuccessTuple:
2040    """
2041    If a pipe's connector is the same as its instance connector,
2042    it's more efficient to sync the pipe in-place rather than reading data into Pandas.
2043
2044    Parameters
2045    ----------
2046    pipe: mrsm.Pipe
2047        The pipe whose connector is the same as its instance.
2048
2049    params: Optional[Dict[str, Any]], default None
2050        Optional params dictionary to build the `WHERE` clause.
2051        See `meerschaum.utils.sql.build_where`.
2052
2053    begin: Union[datetime, int, None], default None
2054        Optionally specify the earliest datetime to search for data.
2055        Defaults to `None`.
2056
2057    end: Union[datetime, int, None], default None
2058        Optionally specify the latest datetime to search for data.
2059        Defaults to `None`.
2060
2061    chunksize: Optional[int], default -1
2062        Specify the number of rows to sync per chunk.
2063        If `-1`, resort to system configuration (default is `900`).
2064        A `chunksize` of `None` will sync all rows in one transaction.
2065        Defaults to `-1`.
2066
2067    check_existing: bool, default True
2068        If `True`, pull and diff with existing data from the pipe.
2069
2070    debug: bool, default False
2071        Verbosity toggle.
2072
2073    Returns
2074    -------
2075    A SuccessTuple.
2076    """
2077    if self.flavor == 'duckdb':
2078        return pipe.sync(
2079            params=params,
2080            begin=begin,
2081            end=end,
2082            chunksize=chunksize,
2083            check_existing=check_existing,
2084            debug=debug,
2085            _inplace=False,
2086            **kw
2087        )
2088    from meerschaum.utils.sql import (
2089        sql_item_name,
2090        get_update_queries,
2091        get_null_replacement,
2092        get_create_table_queries,
2093        get_create_schema_if_not_exists_queries,
2094        get_table_cols_types,
2095        session_execute,
2096        dateadd_str,
2097        UPDATE_QUERIES,
2098    )
2099    from meerschaum.utils.dtypes.sql import (
2100        get_pd_type_from_db_type,
2101        get_db_type_from_pd_type,
2102    )
2103    from meerschaum.utils.misc import generate_password
2104
2105    transaction_id_length = (
2106        mrsm.get_config(
2107            'system', 'connectors', 'sql', 'instance', 'temporary_target', 'transaction_id_length'
2108        )
2109    )
2110    transact_id = generate_password(transaction_id_length)
2111
2112    internal_schema = self.internal_schema
2113    target = pipe.target
2114    temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update']
2115    temp_tables = {
2116        table_root: self.get_temporary_target(target, transact_id=transact_id, label=table_root)
2117        for table_root in temp_table_roots
2118    }
2119    temp_table_names = {
2120        table_root: sql_item_name(table_name_raw, self.flavor, internal_schema)
2121        for table_root, table_name_raw in temp_tables.items()
2122    }
2123    temp_table_aliases = {
2124        table_root: sql_item_name(table_root, self.flavor)
2125        for table_root in temp_table_roots
2126    }
2127    table_alias_as = " AS" if self.flavor != 'oracle' else ''
2128    metadef = self.get_pipe_metadef(
2129        pipe,
2130        params=params,
2131        begin=begin,
2132        end=end,
2133        check_existing=check_existing,
2134        debug=debug,
2135    )
2136    pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2137    upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in UPDATE_QUERIES
2138    static = pipe.parameters.get('static', False)
2139    database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
2140    primary_key = pipe.columns.get('primary', None)
2141    primary_key_typ = pipe.dtypes.get(primary_key, None) if primary_key else None
2142    primary_key_db_type = (
2143        get_db_type_from_pd_type(primary_key_typ, self.flavor)
2144        if primary_key_typ
2145        else None
2146    )
2147    if not {col_key: col for col_key, col in pipe.columns.items() if col_key and col}:
2148        return False, "Cannot sync in-place without index columns."
2149
2150    autoincrement = pipe.parameters.get('autoincrement', False)
2151    dt_col = pipe.columns.get('datetime', None)
2152    dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
2153    dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
2154    dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
2155
2156    def clean_up_temp_tables(ready_to_drop: bool = False):
2157        log_success, log_msg = self._log_temporary_tables_creation(
2158            [
2159                table
2160                for table in temp_tables.values()
2161            ] if not upsert else [temp_tables['update']],
2162            ready_to_drop=ready_to_drop,
2163            create=(not pipe.temporary),
2164            debug=debug,
2165        )
2166        if not log_success:
2167            warn(log_msg)
2168        drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables(
2169            refresh=False,
2170            debug=debug,
2171        )
2172        if not drop_stale_success:
2173            warn(drop_stale_msg)
2174        return drop_stale_success, drop_stale_msg
2175
2176    sqlalchemy, sqlalchemy_orm = mrsm.attempt_import(
2177        'sqlalchemy',
2178        'sqlalchemy.orm',
2179    )
2180    if not pipe.exists(debug=debug):
2181        schema = self.get_pipe_schema(pipe)
2182        create_pipe_queries = get_create_table_queries(
2183            metadef,
2184            pipe.target,
2185            self.flavor,
2186            schema=schema,
2187            primary_key=primary_key,
2188            primary_key_db_type=primary_key_db_type,
2189            autoincrement=autoincrement,
2190            datetime_column=dt_col,
2191        )
2192        if schema:
2193            create_pipe_queries = (
2194                get_create_schema_if_not_exists_queries(schema, self.flavor)
2195                + create_pipe_queries
2196            )
2197
2198        results = self.exec_queries(create_pipe_queries, debug=debug)
2199        if not all(results):
2200            _ = clean_up_temp_tables()
2201            return False, f"Could not insert new data into {pipe} from its SQL query definition."
2202
2203        if not self.create_indices(pipe, debug=debug):
2204            warn(f"Failed to create indices for {pipe}. Continuing...")
2205
2206        rowcount = pipe.get_rowcount(debug=debug)
2207        _ = clean_up_temp_tables()
2208        return True, f"Inserted {rowcount:,}, updated 0 rows."
2209
2210    session = sqlalchemy_orm.Session(self.engine)
2211    connectable = session if self.flavor != 'duckdb' else self
2212
2213    create_new_query = get_create_table_queries(
2214        metadef,
2215        temp_tables[('new') if not upsert else 'update'],
2216        self.flavor,
2217        schema=internal_schema,
2218    )[0]
2219    (create_new_success, create_new_msg), create_new_results = session_execute(
2220        session,
2221        create_new_query,
2222        with_results=True,
2223        debug=debug,
2224    )
2225    if not create_new_success:
2226        _ = clean_up_temp_tables()
2227        return create_new_success, create_new_msg
2228    new_count = create_new_results[0].rowcount if create_new_results else 0
2229
2230    new_cols_types = get_table_cols_types(
2231        temp_tables[('new' if not upsert else 'update')],
2232        connectable=connectable,
2233        flavor=self.flavor,
2234        schema=internal_schema,
2235        database=database,
2236        debug=debug,
2237    ) if not static else pipe.get_columns_types(debug=debug)
2238    if not new_cols_types:
2239        return False, f"Failed to get new columns for {pipe}."
2240
2241    new_cols = {
2242        str(col_name): get_pd_type_from_db_type(str(col_type))
2243        for col_name, col_type in new_cols_types.items()
2244    }
2245    new_cols_str = '\n    ' + ',\n    '.join([
2246        sql_item_name(col, self.flavor)
2247        for col in new_cols
2248    ])
2249    def get_col_typ(col: str, cols_types: Dict[str, str]) -> str:
2250        if self.flavor == 'oracle' and new_cols_types.get(col, '').lower() == 'char':
2251            return new_cols_types[col]
2252        return cols_types[col]
2253
2254    add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug)
2255    if add_cols_queries:
2256        pipe._clear_cache_key('_columns_types', debug=debug)
2257        pipe._clear_cache_key('_columns_indices', debug=debug)
2258        self.exec_queries(add_cols_queries, debug=debug)
2259
2260    alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug)
2261    if alter_cols_queries:
2262        pipe._clear_cache_key('_columns_types', debug=debug)
2263        self.exec_queries(alter_cols_queries, debug=debug)
2264
2265    insert_queries = [
2266        (
2267            f"INSERT INTO {pipe_name} ({new_cols_str})\n"
2268            f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}{table_alias_as}"
2269            f" {temp_table_aliases['new']}"
2270        )
2271    ] if not check_existing and not upsert else []
2272
2273    new_queries = insert_queries
2274    new_success, new_msg = (
2275        session_execute(session, new_queries, debug=debug)
2276        if new_queries
2277        else (True, "Success")
2278    )
2279    if not new_success:
2280        _ = clean_up_temp_tables()
2281        return new_success, new_msg
2282
2283    if not check_existing:
2284        session.commit()
2285        _ = clean_up_temp_tables()
2286        return True, f"Inserted {new_count}, updated 0 rows."
2287
2288    min_dt_col_name_da = dateadd_str(
2289        flavor=self.flavor, begin=f"MIN({dt_col_name})", db_type=dt_db_type,
2290    )
2291    max_dt_col_name_da = dateadd_str(
2292        flavor=self.flavor, begin=f"MAX({dt_col_name})", db_type=dt_db_type,
2293    )
2294
2295    (new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute(
2296        session,
2297        [
2298            "SELECT\n"
2299            f"    {min_dt_col_name_da} AS {sql_item_name('min_dt', self.flavor)},\n"
2300            f"    {max_dt_col_name_da} AS {sql_item_name('max_dt', self.flavor)}\n"
2301            f"FROM {temp_table_names['new' if not upsert else 'update']}\n"
2302            f"WHERE {dt_col_name} IS NOT NULL"
2303        ],
2304        with_results=True,
2305        debug=debug,
2306    ) if dt_col and not upsert else ((True, "Success"), None)
2307    if not new_dt_bounds_success:
2308        return (
2309            new_dt_bounds_success,
2310            f"Could not determine in-place datetime bounds:\n{new_dt_bounds_msg}"
2311        )
2312
2313    if dt_col and not upsert:
2314        begin, end = new_dt_bounds_results[0].fetchone()
2315
2316    backtrack_def = self.get_pipe_data_query(
2317        pipe,
2318        begin=begin,
2319        end=end,
2320        begin_add_minutes=0,
2321        end_add_minutes=1,
2322        params=params,
2323        debug=debug,
2324        order=None,
2325    )
2326    create_backtrack_query = get_create_table_queries(
2327        backtrack_def,
2328        temp_tables['backtrack'],
2329        self.flavor,
2330        schema=internal_schema,
2331    )[0]
2332    (create_backtrack_success, create_backtrack_msg), create_backtrack_results = session_execute(
2333        session,
2334        create_backtrack_query,
2335        with_results=True,
2336        debug=debug,
2337    ) if not upsert else ((True, "Success"), None)
2338
2339    if not create_backtrack_success:
2340        _ = clean_up_temp_tables()
2341        return create_backtrack_success, create_backtrack_msg
2342
2343    backtrack_cols_types = get_table_cols_types(
2344        temp_tables['backtrack'],
2345        connectable=connectable,
2346        flavor=self.flavor,
2347        schema=internal_schema,
2348        database=database,
2349        debug=debug,
2350    ) if not (upsert or static) else new_cols_types
2351
2352    common_cols = [col for col in new_cols if col in backtrack_cols_types]
2353    primary_key = pipe.columns.get('primary', None)
2354    on_cols = {
2355        col: new_cols.get(col)
2356        for col_key, col in pipe.columns.items()
2357        if (
2358            col
2359            and
2360            col_key != 'value'
2361            and col in backtrack_cols_types
2362            and col in new_cols
2363        )
2364    } if not primary_key else {primary_key: new_cols.get(primary_key)}
2365    if not on_cols:
2366        raise ValueError("Cannot sync without common index columns.")
2367
2368    null_replace_new_cols_str = (
2369        '\n    ' + ',\n    '.join([
2370            f"COALESCE({temp_table_aliases['new']}.{sql_item_name(col, self.flavor)}, "
2371            + get_null_replacement(get_col_typ(col, new_cols_types), self.flavor)
2372            + ") AS "
2373            + sql_item_name(col, self.flavor, None)
2374            for col, typ in new_cols.items()
2375        ])
2376    )
2377
2378    select_delta_query = (
2379        "SELECT"
2380        + null_replace_new_cols_str
2381        + f"\nFROM {temp_table_names['new']}{table_alias_as} {temp_table_aliases['new']}\n"
2382        + f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as} {temp_table_aliases['backtrack']}"
2383        + "\n    ON\n    "
2384        + '\n    AND\n    '.join([
2385            (
2386                f"    COALESCE({temp_table_aliases['new']}."
2387                + sql_item_name(c, self.flavor, None)
2388                + ", "
2389                + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor)
2390                + ")"
2391                + '\n        =\n    '
2392                + f"    COALESCE({temp_table_aliases['backtrack']}."
2393                + sql_item_name(c, self.flavor, None)
2394                + ", "
2395                + get_null_replacement(get_col_typ(c, backtrack_cols_types), self.flavor)
2396                + ") "
2397            ) for c in common_cols
2398        ])
2399        + "\nWHERE\n    "
2400        + '\n    AND\n    '.join([
2401            (
2402                f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor) + ' IS NULL'
2403            ) for c in common_cols
2404        ])
2405    )
2406    create_delta_query = get_create_table_queries(
2407        select_delta_query,
2408        temp_tables['delta'],
2409        self.flavor,
2410        schema=internal_schema,
2411    )[0]
2412    create_delta_success, create_delta_msg = session_execute(
2413        session,
2414        create_delta_query,
2415        debug=debug,
2416    ) if not upsert else (True, "Success")
2417    if not create_delta_success:
2418        _ = clean_up_temp_tables()
2419        return create_delta_success, create_delta_msg
2420
2421    delta_cols_types = get_table_cols_types(
2422        temp_tables['delta'],
2423        connectable=connectable,
2424        flavor=self.flavor,
2425        schema=internal_schema,
2426        database=database,
2427        debug=debug,
2428    ) if not (upsert or static) else new_cols_types
2429
2430    ### This is a weird bug on SQLite.
2431    ### Sometimes the backtrack dtypes are all empty strings.
2432    if not all(delta_cols_types.values()):
2433        delta_cols_types = new_cols_types
2434
2435    delta_cols = {
2436        col: get_pd_type_from_db_type(typ)
2437        for col, typ in delta_cols_types.items()
2438    }
2439    delta_cols_str = ', '.join([
2440        sql_item_name(col, self.flavor)
2441        for col in delta_cols
2442    ])
2443
2444    select_joined_query = (
2445        "SELECT\n    "
2446        + (',\n    '.join([
2447            (
2448                f"{temp_table_aliases['delta']}." + sql_item_name(c, self.flavor, None)
2449                + " AS " + sql_item_name(c + '_delta', self.flavor, None)
2450            ) for c in delta_cols
2451        ]))
2452        + ",\n    "
2453        + (',\n    '.join([
2454            (
2455                f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor, None)
2456                + " AS " + sql_item_name(c + '_backtrack', self.flavor, None)
2457            ) for c in backtrack_cols_types
2458        ]))
2459        + f"\nFROM {temp_table_names['delta']}{table_alias_as} {temp_table_aliases['delta']}\n"
2460        + f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as}"
2461        + f" {temp_table_aliases['backtrack']}"
2462        + "\n    ON\n    "
2463        + '\n    AND\n    '.join([
2464            (
2465                f"    COALESCE({temp_table_aliases['delta']}." + sql_item_name(c, self.flavor)
2466                + ", "
2467                + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")"
2468                + '\n        =\n    '
2469                + f"    COALESCE({temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor)
2470                + ", "
2471                + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")"
2472            ) for c, typ in on_cols.items()
2473        ])
2474    )
2475
2476    create_joined_query = get_create_table_queries(
2477        select_joined_query,
2478        temp_tables['joined'],
2479        self.flavor,
2480        schema=internal_schema,
2481    )[0]
2482    create_joined_success, create_joined_msg = session_execute(
2483        session,
2484        create_joined_query,
2485        debug=debug,
2486    ) if on_cols and not upsert else (True, "Success")
2487    if not create_joined_success:
2488        _ = clean_up_temp_tables()
2489        return create_joined_success, create_joined_msg
2490
2491    select_unseen_query = (
2492        "SELECT\n    "
2493        + (',\n    '.join([
2494            (
2495                "CASE\n        WHEN " + sql_item_name(c + '_delta', self.flavor, None)
2496                + " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor)
2497                + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
2498                + "\n        ELSE NULL\n    END"
2499                + " AS " + sql_item_name(c, self.flavor, None)
2500            ) for c, typ in delta_cols.items()
2501        ]))
2502        + f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n"
2503        + "WHERE\n    "
2504        + '\n    AND\n    '.join([
2505            (
2506                sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL'
2507            ) for c in delta_cols
2508        ])
2509    )
2510    create_unseen_query = get_create_table_queries(
2511        select_unseen_query,
2512        temp_tables['unseen'],
2513        self.flavor,
2514        internal_schema,
2515    )[0]
2516    (create_unseen_success, create_unseen_msg), create_unseen_results = session_execute(
2517        session,
2518        create_unseen_query,
2519        with_results=True,
2520        debug=debug
2521    ) if not upsert else ((True, "Success"), None)
2522    if not create_unseen_success:
2523        _ = clean_up_temp_tables()
2524        return create_unseen_success, create_unseen_msg
2525
2526    select_update_query = (
2527        "SELECT\n    "
2528        + (',\n    '.join([
2529            (
2530                "CASE\n        WHEN " + sql_item_name(c + '_delta', self.flavor, None)
2531                + " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor)
2532                + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
2533                + "\n        ELSE NULL\n    END"
2534                + " AS " + sql_item_name(c, self.flavor, None)
2535            ) for c, typ in delta_cols.items()
2536        ]))
2537        + f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n"
2538        + "WHERE\n    "
2539        + '\n    OR\n    '.join([
2540            (
2541                sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL'
2542            ) for c in delta_cols
2543        ])
2544    )
2545
2546    create_update_query = get_create_table_queries(
2547        select_update_query,
2548        temp_tables['update'],
2549        self.flavor,
2550        internal_schema,
2551    )[0]
2552    (create_update_success, create_update_msg), create_update_results = session_execute(
2553        session,
2554        create_update_query,
2555        with_results=True,
2556        debug=debug,
2557    ) if on_cols and not upsert else ((True, "Success"), [])
2558    apply_update_queries = (
2559        get_update_queries(
2560            pipe.target,
2561            temp_tables['update'],
2562            session,
2563            on_cols,
2564            upsert=upsert,
2565            schema=self.get_pipe_schema(pipe),
2566            patch_schema=internal_schema,
2567            target_cols_types=pipe.get_columns_types(debug=debug),
2568            patch_cols_types=delta_cols_types,
2569            datetime_col=pipe.columns.get('datetime', None),
2570            flavor=self.flavor,
2571            null_indices=pipe.null_indices,
2572            cast_columns=pipe.enforce,
2573            debug=debug,
2574        )
2575        if on_cols else []
2576    )
2577
2578    apply_unseen_queries = [
2579        (
2580            f"INSERT INTO {pipe_name} ({delta_cols_str})\n"
2581            + f"SELECT {delta_cols_str}\nFROM "
2582            + (
2583                temp_table_names['unseen']
2584                if on_cols
2585                else temp_table_names['delta']
2586            )
2587        ),
2588    ]
2589
2590    (apply_unseen_success, apply_unseen_msg), apply_unseen_results = session_execute(
2591        session,
2592        apply_unseen_queries,
2593        with_results=True,
2594        debug=debug,
2595    ) if not upsert else ((True, "Success"), None)
2596    if not apply_unseen_success:
2597        _ = clean_up_temp_tables()
2598        return apply_unseen_success, apply_unseen_msg
2599    unseen_count = apply_unseen_results[0].rowcount if apply_unseen_results else 0
2600
2601    (apply_update_success, apply_update_msg), apply_update_results = session_execute(
2602        session,
2603        apply_update_queries,
2604        with_results=True,
2605        debug=debug,
2606    )
2607    if not apply_update_success:
2608        _ = clean_up_temp_tables()
2609        return apply_update_success, apply_update_msg
2610    update_count = apply_update_results[0].rowcount if apply_update_results else 0
2611
2612    session.commit()
2613
2614    msg = (
2615        f"Inserted {unseen_count:,}, updated {update_count:,} rows."
2616        if not upsert
2617        else f"Upserted {update_count:,} row" + ('s' if update_count != 1 else '') + "."
2618    )
2619    _ = clean_up_temp_tables(ready_to_drop=True)
2620
2621    return True, msg

If a pipe's connector is the same as its instance connector, it's more efficient to sync the pipe in-place rather than reading data into Pandas.

Parameters
  • pipe (mrsm.Pipe): The pipe whose connector is the same as its instance.
  • params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause. See meerschaum.utils.sql.build_where.
  • begin (Union[datetime, int, None], default None): Optionally specify the earliest datetime to search for data. Defaults to None.
  • end (Union[datetime, int, None], default None): Optionally specify the latest datetime to search for data. Defaults to None.
  • chunksize (Optional[int], default -1): Specify the number of rows to sync per chunk. If -1, resort to system configuration (default is 900). A chunksize of None will sync all rows in one transaction. Defaults to -1.
  • check_existing (bool, default True): If True, pull and diff with existing data from the pipe.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple.
def get_sync_time( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, newest: bool = True, remote: bool = False, debug: bool = False) -> Union[datetime.datetime, int, NoneType]:
2624def get_sync_time(
2625    self,
2626    pipe: 'mrsm.Pipe',
2627    params: Optional[Dict[str, Any]] = None,
2628    newest: bool = True,
2629    remote: bool = False,
2630    debug: bool = False,
2631) -> Union[datetime, int, None]:
2632    """Get a Pipe's most recent datetime value.
2633
2634    Parameters
2635    ----------
2636    pipe: mrsm.Pipe
2637        The pipe to get the sync time for.
2638
2639    params: Optional[Dict[str, Any]], default None
2640        Optional params dictionary to build the `WHERE` clause.
2641        See `meerschaum.utils.sql.build_where`.
2642
2643    newest: bool, default True
2644        If `True`, get the most recent datetime (honoring `params`).
2645        If `False`, get the oldest datetime (ASC instead of DESC).
2646
2647    remote: bool, default False
2648        If `True`, return the sync time for the remote fetch definition.
2649
2650    Returns
2651    -------
2652    A `datetime` object (or `int` if using an integer axis) if the pipe exists, otherwise `None`.
2653    """
2654    from meerschaum.utils.sql import sql_item_name, build_where, wrap_query_with_cte
2655    src_name = sql_item_name('src', self.flavor)
2656    table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2657
2658    dt_col = pipe.columns.get('datetime', None)
2659    if dt_col is None:
2660        return None
2661    dt_col_name = sql_item_name(dt_col, self.flavor, None)
2662
2663    if remote and pipe.connector.type != 'sql':
2664        warn(f"Cannot get the remote sync time for {pipe}.")
2665        return None
2666
2667    ASC_or_DESC = "DESC" if newest else "ASC"
2668    existing_cols = pipe.get_columns_types(debug=debug)
2669    valid_params = {}
2670    if params is not None:
2671        valid_params = {k: v for k, v in params.items() if k in existing_cols}
2672    flavor = self.flavor if not remote else pipe.connector.flavor
2673
2674    ### If no bounds are provided for the datetime column,
2675    ### add IS NOT NULL to the WHERE clause.
2676    if dt_col not in valid_params:
2677        valid_params[dt_col] = '_None'
2678    where = "" if not valid_params else build_where(valid_params, self)
2679    src_query = (
2680        f"SELECT {dt_col_name}\nFROM {table_name}{where}"
2681        if not remote
2682        else self.get_pipe_metadef(pipe, params=params, begin=None, end=None)
2683    )
2684
2685    base_query = (
2686        f"SELECT {dt_col_name}\n"
2687        f"FROM {src_name}{where}\n"
2688        f"ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2689        f"LIMIT 1"
2690    )
2691    if self.flavor == 'mssql':
2692        base_query = (
2693            f"SELECT TOP 1 {dt_col_name}\n"
2694            f"FROM {src_name}{where}\n"
2695            f"ORDER BY {dt_col_name} {ASC_or_DESC}"
2696        )
2697    elif self.flavor == 'oracle':
2698        base_query = (
2699            "SELECT * FROM (\n"
2700            f"    SELECT {dt_col_name}\n"
2701            f"    FROM {src_name}{where}\n"
2702            f"    ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2703            ") WHERE ROWNUM = 1"
2704        )
2705
2706    query = wrap_query_with_cte(src_query, base_query, flavor)
2707
2708    try:
2709        db_time = self.value(query, silent=True, debug=debug)
2710
2711        ### No datetime could be found.
2712        if db_time is None:
2713            return None
2714        ### sqlite returns str.
2715        if isinstance(db_time, str):
2716            dateutil_parser = mrsm.attempt_import('dateutil.parser')
2717            st = dateutil_parser.parse(db_time)
2718        ### Do nothing if a datetime object is returned.
2719        elif isinstance(db_time, datetime):
2720            if hasattr(db_time, 'to_pydatetime'):
2721                st = db_time.to_pydatetime()
2722            else:
2723                st = db_time
2724        ### Sometimes the datetime is actually a date.
2725        elif isinstance(db_time, date):
2726            st = datetime.combine(db_time, datetime.min.time())
2727        ### Adding support for an integer datetime axis.
2728        elif 'int' in str(type(db_time)).lower():
2729            st = int(db_time)
2730        ### Convert pandas timestamp to Python datetime.
2731        else:
2732            st = db_time.to_pydatetime()
2733
2734        sync_time = st
2735
2736    except Exception as e:
2737        sync_time = None
2738        warn(str(e))
2739
2740    return sync_time

Get a Pipe's most recent datetime value.

Parameters
  • pipe (mrsm.Pipe): The pipe to get the sync time for.
  • params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause. See meerschaum.utils.sql.build_where.
  • newest (bool, default True): If True, get the most recent datetime (honoring params). If False, get the oldest datetime (ASC instead of DESC).
  • remote (bool, default False): If True, return the sync time for the remote fetch definition.
Returns
  • A datetime object (or int if using an integer axis) if the pipe exists, otherwise None.
def pipe_exists(self, pipe: meerschaum.Pipe, debug: bool = False) -> bool:
2743def pipe_exists(
2744    self,
2745    pipe: mrsm.Pipe,
2746    debug: bool = False
2747) -> bool:
2748    """
2749    Check that a Pipe's table exists.
2750
2751    Parameters
2752    ----------
2753    pipe: mrsm.Pipe:
2754        The pipe to check.
2755
2756    debug: bool, default False
2757        Verbosity toggle.
2758
2759    Returns
2760    -------
2761    A `bool` corresponding to whether a pipe's table exists.
2762
2763    """
2764    from meerschaum.utils.sql import table_exists
2765    exists = table_exists(
2766        pipe.target,
2767        self,
2768        schema=self.get_pipe_schema(pipe),
2769        debug=debug,
2770    )
2771    if debug:
2772        dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.'))
2773    return exists

Check that a Pipe's table exists.

Parameters
  • pipe (mrsm.Pipe:): The pipe to check.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A bool corresponding to whether a pipe's table exists.
def get_pipe_rowcount( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, remote: bool = False, debug: bool = False) -> Optional[int]:
2776def get_pipe_rowcount(
2777    self,
2778    pipe: mrsm.Pipe,
2779    begin: Union[datetime, int, None] = None,
2780    end: Union[datetime, int, None] = None,
2781    params: Optional[Dict[str, Any]] = None,
2782    remote: bool = False,
2783    debug: bool = False
2784) -> Union[int, None]:
2785    """
2786    Get the rowcount for a pipe in accordance with given parameters.
2787
2788    Parameters
2789    ----------
2790    pipe: mrsm.Pipe
2791        The pipe to query with.
2792
2793    begin: Union[datetime, int, None], default None
2794        The begin datetime value.
2795
2796    end: Union[datetime, int, None], default None
2797        The end datetime value.
2798
2799    params: Optional[Dict[str, Any]], default None
2800        See `meerschaum.utils.sql.build_where`.
2801
2802    remote: bool, default False
2803        If `True`, get the rowcount for the remote table.
2804
2805    debug: bool, default False
2806        Verbosity toggle.
2807
2808    Returns
2809    -------
2810    An `int` for the number of rows if the `pipe` exists, otherwise `None`.
2811
2812    """
2813    from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte, build_where
2814    from meerschaum.connectors.sql._fetch import get_pipe_query
2815    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
2816    if remote:
2817        msg = f"'fetch:definition' must be an attribute of {pipe} to get a remote rowcount."
2818        if 'fetch' not in pipe.parameters:
2819            error(msg)
2820            return None
2821        if 'definition' not in pipe.parameters['fetch']:
2822            error(msg)
2823            return None
2824
2825    flavor = self.flavor if not remote else pipe.connector.flavor
2826    conn = self if not remote else pipe.connector
2827    _pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
2828    dt_col = pipe.columns.get('datetime', None)
2829    dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
2830    dt_db_type = get_db_type_from_pd_type(dt_typ, flavor) if dt_typ else None
2831    if not dt_col:
2832        dt_col = pipe.guess_datetime()
2833        dt_name = sql_item_name(dt_col, flavor, None) if dt_col else None
2834        is_guess = True
2835    else:
2836        dt_col = pipe.get_columns('datetime')
2837        dt_name = sql_item_name(dt_col, flavor, None)
2838        is_guess = False
2839
2840    if begin is not None or end is not None:
2841        if is_guess:
2842            if dt_col is None:
2843                warn(
2844                    f"No datetime could be determined for {pipe}."
2845                    + "\n    Ignoring begin and end...",
2846                    stack=False,
2847                )
2848                begin, end = None, None
2849            else:
2850                warn(
2851                    f"A datetime wasn't specified for {pipe}.\n"
2852                    + f"    Using column \"{dt_col}\" for datetime bounds...",
2853                    stack=False,
2854                )
2855
2856
2857    _datetime_name = sql_item_name(dt_col, flavor)
2858    _cols_names = [
2859        sql_item_name(col, flavor)
2860        for col in set(
2861            (
2862                [dt_col]
2863                if dt_col
2864                else []
2865            ) + (
2866                []
2867                if params is None
2868                else list(params.keys())
2869            )
2870        )
2871    ]
2872    if not _cols_names:
2873        _cols_names = ['*']
2874
2875    src = (
2876        f"SELECT {', '.join(_cols_names)}\nFROM {_pipe_name}"
2877        if not remote
2878        else get_pipe_query(pipe)
2879    )
2880    parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', flavor)}"
2881    query = wrap_query_with_cte(src, parent_query, flavor)
2882    if begin is not None or end is not None:
2883        query += "\nWHERE"
2884    if begin is not None:
2885        query += (
2886            f"\n    {dt_name} >= "
2887            + dateadd_str(flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type)
2888        )
2889    if end is not None and begin is not None:
2890        query += "\n    AND"
2891    if end is not None:
2892        query += (
2893            f"\n    {dt_name} <  "
2894            + dateadd_str(flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type)
2895        )
2896    if params is not None:
2897        existing_cols = pipe.get_columns_types(debug=debug)
2898        valid_params = {k: v for k, v in params.items() if k in existing_cols}
2899        if valid_params:
2900            query += build_where(valid_params, conn).replace('WHERE', (
2901                'AND' if (begin is not None or end is not None)
2902                    else 'WHERE'
2903                )
2904            )
2905
2906    result = conn.value(query, debug=debug, silent=True)
2907    try:
2908        return int(result)
2909    except Exception:
2910        return None

Get the rowcount for a pipe in accordance with given parameters.

Parameters
  • pipe (mrsm.Pipe): The pipe to query with.
  • begin (Union[datetime, int, None], default None): The begin datetime value.
  • end (Union[datetime, int, None], default None): The end datetime value.
  • params (Optional[Dict[str, Any]], default None): See meerschaum.utils.sql.build_where.
  • remote (bool, default False): If True, get the rowcount for the remote table.
  • debug (bool, default False): Verbosity toggle.
Returns
  • An int for the number of rows if the pipe exists, otherwise None.
def drop_pipe( self, pipe: meerschaum.Pipe, debug: bool = False, **kw) -> Tuple[bool, str]:
2913def drop_pipe(
2914    self,
2915    pipe: mrsm.Pipe,
2916    debug: bool = False,
2917    **kw
2918) -> SuccessTuple:
2919    """
2920    Drop a pipe's tables but maintain its registration.
2921
2922    Parameters
2923    ----------
2924    pipe: mrsm.Pipe
2925        The pipe to drop.
2926
2927    Returns
2928    -------
2929    A `SuccessTuple` indicated success.
2930    """
2931    from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
2932    success = True
2933    target = pipe.target
2934    schema = self.get_pipe_schema(pipe)
2935    target_name = (
2936        sql_item_name(target, self.flavor, schema)
2937    )
2938    if table_exists(target, self, schema=schema, debug=debug):
2939        if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
2940        success = self.exec(
2941            f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug
2942        ) is not None
2943
2944    msg = "Success" if success else f"Failed to drop {pipe}."
2945    return success, msg

Drop a pipe's tables but maintain its registration.

Parameters
  • pipe (mrsm.Pipe): The pipe to drop.
Returns
  • A SuccessTuple indicated success.
def clear_pipe( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kw) -> Tuple[bool, str]:
2948def clear_pipe(
2949    self,
2950    pipe: mrsm.Pipe,
2951    begin: Union[datetime, int, None] = None,
2952    end: Union[datetime, int, None] = None,
2953    params: Optional[Dict[str, Any]] = None,
2954    debug: bool = False,
2955    **kw
2956) -> SuccessTuple:
2957    """
2958    Delete a pipe's data within a bounded or unbounded interval without dropping the table.
2959
2960    Parameters
2961    ----------
2962    pipe: mrsm.Pipe
2963        The pipe to clear.
2964        
2965    begin: Union[datetime, int, None], default None
2966        Beginning datetime. Inclusive.
2967
2968    end: Union[datetime, int, None], default None
2969         Ending datetime. Exclusive.
2970
2971    params: Optional[Dict[str, Any]], default None
2972         See `meerschaum.utils.sql.build_where`.
2973
2974    """
2975    if not pipe.exists(debug=debug):
2976        return True, f"{pipe} does not exist, so nothing was cleared."
2977
2978    from meerschaum.utils.sql import sql_item_name, build_where, dateadd_str
2979    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
2980    pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2981
2982    dt_col = pipe.columns.get('datetime', None)
2983    dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
2984    dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
2985    if not pipe.columns.get('datetime', None):
2986        dt_col = pipe.guess_datetime()
2987        dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
2988        is_guess = True
2989    else:
2990        dt_col = pipe.get_columns('datetime')
2991        dt_name = sql_item_name(dt_col, self.flavor, None)
2992        is_guess = False
2993
2994    if begin is not None or end is not None:
2995        if is_guess:
2996            if dt_col is None:
2997                warn(
2998                    f"No datetime could be determined for {pipe}."
2999                    + "\n    Ignoring datetime bounds...",
3000                    stack=False,
3001                )
3002                begin, end = None, None
3003            else:
3004                warn(
3005                    f"A datetime wasn't specified for {pipe}.\n"
3006                    + f"    Using column \"{dt_col}\" for datetime bounds...",
3007                    stack=False,
3008                )
3009
3010    valid_params = {}
3011    if params is not None:
3012        existing_cols = pipe.get_columns_types(debug=debug)
3013        valid_params = {k: v for k, v in params.items() if k in existing_cols}
3014    clear_query = (
3015        f"DELETE FROM {pipe_name}\nWHERE 1 = 1\n"
3016        + ('\n    AND ' + build_where(valid_params, self, with_where=False) if valid_params else '')
3017        + (
3018            (
3019                f'\n    AND {dt_name} >= '
3020                + dateadd_str(self.flavor, 'day', 0, begin, db_type=dt_db_type)
3021            )
3022            if begin is not None
3023            else ''
3024        ) + (
3025            (
3026                f'\n    AND {dt_name} <  '
3027                + dateadd_str(self.flavor, 'day', 0, end, db_type=dt_db_type)
3028            )
3029            if end is not None
3030            else ''
3031        )
3032    )
3033    success = self.exec(clear_query, silent=True, debug=debug) is not None
3034    msg = "Success" if success else f"Failed to clear {pipe}."
3035    return success, msg

Delete a pipe's data within a bounded or unbounded interval without dropping the table.

Parameters
  • pipe (mrsm.Pipe): The pipe to clear.
  • begin (Union[datetime, int, None], default None): Beginning datetime. Inclusive.
  • end (Union[datetime, int, None], default None): Ending datetime. Exclusive.
  • params (Optional[Dict[str, Any]], default None): See meerschaum.utils.sql.build_where.
def deduplicate_pipe( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, int, NoneType] = None, end: Union[datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kwargs: Any) -> Tuple[bool, str]:
3678def deduplicate_pipe(
3679    self,
3680    pipe: mrsm.Pipe,
3681    begin: Union[datetime, int, None] = None,
3682    end: Union[datetime, int, None] = None,
3683    params: Optional[Dict[str, Any]] = None,
3684    debug: bool = False,
3685    **kwargs: Any
3686) -> SuccessTuple:
3687    """
3688    Delete duplicate values within a pipe's table.
3689
3690    Parameters
3691    ----------
3692    pipe: mrsm.Pipe
3693        The pipe whose table to deduplicate.
3694
3695    begin: Union[datetime, int, None], default None
3696        If provided, only deduplicate values greater than or equal to this value.
3697
3698    end: Union[datetime, int, None], default None
3699        If provided, only deduplicate values less than this value.
3700
3701    params: Optional[Dict[str, Any]], default None
3702        If provided, further limit deduplication to values which match this query dictionary.
3703
3704    debug: bool, default False
3705        Verbosity toggle.
3706
3707    Returns
3708    -------
3709    A `SuccessTuple` indicating success.
3710    """
3711    from meerschaum.utils.sql import (
3712        sql_item_name,
3713        get_rename_table_queries,
3714        DROP_IF_EXISTS_FLAVORS,
3715        get_create_table_query,
3716        format_cte_subquery,
3717        get_null_replacement,
3718    )
3719    from meerschaum.utils.misc import generate_password, flatten_list
3720
3721    pipe_table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
3722
3723    if not pipe.exists(debug=debug):
3724        return False, f"Table {pipe_table_name} does not exist."
3725
3726    dt_col = pipe.columns.get('datetime', None)
3727    cols_types = pipe.get_columns_types(debug=debug)
3728    existing_cols = pipe.get_columns_types(debug=debug)
3729
3730    get_rowcount_query = f"SELECT COUNT(*) FROM {pipe_table_name}"
3731    old_rowcount = self.value(get_rowcount_query, debug=debug)
3732    if old_rowcount is None:
3733        return False, f"Failed to get rowcount for table {pipe_table_name}."
3734
3735    ### Non-datetime indices that in fact exist.
3736    indices = [
3737        col
3738        for key, col in pipe.columns.items()
3739        if col and col != dt_col and col in cols_types
3740    ]
3741    indices_names = [sql_item_name(index_col, self.flavor, None) for index_col in indices]
3742    existing_cols_names = [sql_item_name(col, self.flavor, None) for col in existing_cols]
3743    duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
3744    previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
3745
3746    index_list_str = (
3747        sql_item_name(dt_col, self.flavor, None)
3748        if dt_col
3749        else ''
3750    )
3751    index_list_str_ordered = (
3752        (
3753            sql_item_name(dt_col, self.flavor, None) + " DESC"
3754        )
3755        if dt_col
3756        else ''
3757    )
3758    if indices:
3759        index_list_str += ', ' + ', '.join(indices_names)
3760        index_list_str_ordered += ', ' + ', '.join(indices_names)
3761    if index_list_str.startswith(','):
3762        index_list_str = index_list_str.lstrip(',').lstrip()
3763    if index_list_str_ordered.startswith(','):
3764        index_list_str_ordered = index_list_str_ordered.lstrip(',').lstrip()
3765
3766    cols_list_str = ', '.join(existing_cols_names)
3767
3768    try:
3769        ### NOTE: MySQL 5 and below does not support window functions (ROW_NUMBER()).
3770        is_old_mysql = (
3771            self.flavor in ('mysql', 'mariadb')
3772            and
3773            int(self.db_version.split('.')[0]) < 8
3774        )
3775    except Exception:
3776        is_old_mysql = False
3777
3778    src_query = f"""
3779        SELECT
3780            {cols_list_str},
3781            ROW_NUMBER() OVER (
3782                PARTITION BY
3783                {index_list_str}
3784                ORDER BY {index_list_str_ordered}
3785            ) AS {duplicate_row_number_name}
3786        FROM {pipe_table_name}
3787    """
3788    duplicates_cte_subquery = format_cte_subquery(
3789        src_query,
3790        self.flavor,
3791        sub_name = 'src',
3792        cols_to_select = cols_list_str,
3793    ) + f"""
3794        WHERE {duplicate_row_number_name} = 1
3795        """
3796    old_mysql_query = (
3797        f"""
3798        SELECT
3799            {index_list_str}
3800        FROM (
3801          SELECT
3802            {index_list_str},
3803            IF(
3804                @{previous_row_number_name} <> {index_list_str.replace(', ', ' + ')},
3805                @{duplicate_row_number_name} := 0,
3806                @{duplicate_row_number_name}
3807            ),
3808            @{previous_row_number_name} := {index_list_str.replace(', ', ' + ')},
3809            @{duplicate_row_number_name} := @{duplicate_row_number_name} + 1 AS """
3810        + f"""{duplicate_row_number_name}
3811          FROM
3812            {pipe_table_name},
3813            (
3814                SELECT @{duplicate_row_number_name} := 0
3815            ) AS {duplicate_row_number_name},
3816            (
3817                SELECT @{previous_row_number_name} := '{get_null_replacement('str', 'mysql')}'
3818            ) AS {previous_row_number_name}
3819          ORDER BY {index_list_str_ordered}
3820        ) AS t
3821        WHERE {duplicate_row_number_name} = 1
3822        """
3823    )
3824    if is_old_mysql:
3825        duplicates_cte_subquery = old_mysql_query
3826
3827    session_id = generate_password(3)
3828
3829    dedup_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='dedup')
3830    temp_old_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='old')
3831    temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3832
3833    create_temporary_table_query = get_create_table_query(
3834        duplicates_cte_subquery,
3835        dedup_table,
3836        self.flavor,
3837    ) + f"""
3838    ORDER BY {index_list_str_ordered}
3839    """
3840    if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
3841    alter_queries = flatten_list([
3842        get_rename_table_queries(
3843            pipe.target,
3844            temp_old_table,
3845            self.flavor,
3846            schema=self.get_pipe_schema(pipe),
3847        ),
3848        get_rename_table_queries(
3849            dedup_table,
3850            pipe.target,
3851            self.flavor,
3852            schema=self.get_pipe_schema(pipe),
3853        ),
3854        f"DROP TABLE {if_exists_str} {temp_old_table_name}",
3855    ])
3856
3857    self._log_temporary_tables_creation(temp_old_table, create=(not pipe.temporary), debug=debug)
3858    create_temporary_result = self.execute(create_temporary_table_query, debug=debug)
3859    if create_temporary_result is None:
3860        return False, f"Failed to deduplicate table {pipe_table_name}."
3861
3862    results = self.exec_queries(
3863        alter_queries,
3864        break_on_error=True,
3865        rollback=True,
3866        debug=debug,
3867    )
3868
3869    fail_query = None
3870    for result, query in zip(results, alter_queries):
3871        if result is None:
3872            fail_query = query
3873            break
3874    success = fail_query is None
3875
3876    new_rowcount = (
3877        self.value(get_rowcount_query, debug=debug)
3878        if success
3879        else None
3880    )
3881
3882    msg = (
3883        (
3884            f"Successfully deduplicated table {pipe_table_name}"
3885            + (
3886                f"\nfrom {old_rowcount:,} to {new_rowcount:,} rows"
3887                if old_rowcount != new_rowcount
3888                else ''
3889            ) + '.'
3890        )
3891        if success
3892        else f"Failed to execute query:\n{fail_query}"
3893    )
3894    return success, msg

Delete duplicate values within a pipe's table.

Parameters
  • pipe (mrsm.Pipe): The pipe whose table to deduplicate.
  • begin (Union[datetime, int, None], default None): If provided, only deduplicate values greater than or equal to this value.
  • end (Union[datetime, int, None], default None): If provided, only deduplicate values less than this value.
  • params (Optional[Dict[str, Any]], default None): If provided, further limit deduplication to values which match this query dictionary.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A SuccessTuple indicating success.
def get_pipe_table( self, pipe: meerschaum.Pipe, debug: bool = False) -> "Union['sqlalchemy.Table', None]":
3038def get_pipe_table(
3039    self,
3040    pipe: mrsm.Pipe,
3041    debug: bool = False,
3042) -> Union['sqlalchemy.Table', None]:
3043    """
3044    Return the `sqlalchemy.Table` object for a `mrsm.Pipe`.
3045
3046    Parameters
3047    ----------
3048    pipe: mrsm.Pipe:
3049        The pipe in question.
3050
3051    Returns
3052    -------
3053    A `sqlalchemy.Table` object. 
3054
3055    """
3056    from meerschaum.utils.sql import get_sqlalchemy_table
3057    if not pipe.exists(debug=debug):
3058        return None
3059
3060    return get_sqlalchemy_table(
3061        pipe.target,
3062        connector=self,
3063        schema=self.get_pipe_schema(pipe),
3064        debug=debug,
3065        refresh=True,
3066    )

Return the sqlalchemy.Table object for a mrsm.Pipe.

Parameters
  • pipe (mrsm.Pipe:): The pipe in question.
Returns
  • A sqlalchemy.Table object.
def get_pipe_columns_types( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, str]:
3069def get_pipe_columns_types(
3070    self,
3071    pipe: mrsm.Pipe,
3072    debug: bool = False,
3073) -> Dict[str, str]:
3074    """
3075    Get the pipe's columns and types.
3076
3077    Parameters
3078    ----------
3079    pipe: mrsm.Pipe:
3080        The pipe to get the columns for.
3081
3082    Returns
3083    -------
3084    A dictionary of columns names (`str`) and types (`str`).
3085
3086    Examples
3087    --------
3088    >>> conn.get_pipe_columns_types(pipe)
3089    {
3090      'dt': 'TIMESTAMP WITHOUT TIMEZONE',
3091      'id': 'BIGINT',
3092      'val': 'DOUBLE PRECISION',
3093    }
3094    >>> 
3095    """
3096    from meerschaum.utils.sql import get_table_cols_types
3097    if not pipe.exists(debug=debug):
3098        return {}
3099
3100    if self.flavor not in ('oracle', 'mysql', 'mariadb', 'sqlite', 'geopackage'):
3101        return get_table_cols_types(
3102            pipe.target,
3103            self,
3104            flavor=self.flavor,
3105            schema=self.get_pipe_schema(pipe),
3106            debug=debug,
3107        )
3108
3109    if debug:
3110        dprint(f"Fetching columns_types for {pipe} with via SQLAlchemy table.")
3111
3112    table_columns = {}
3113    try:
3114        pipe_table = self.get_pipe_table(pipe, debug=debug)
3115        if pipe_table is None:
3116            return {}
3117
3118        if debug:
3119            dprint("Found columns:")
3120            mrsm.pprint(dict(pipe_table.columns))
3121
3122        for col in pipe_table.columns:
3123            table_columns[str(col.name)] = str(col.type)
3124    except Exception as e:
3125        traceback.print_exc()
3126        warn(e)
3127        table_columns = {}
3128
3129    return table_columns

Get the pipe's columns and types.

Parameters
  • pipe (mrsm.Pipe:): The pipe to get the columns for.
Returns
  • A dictionary of columns names (str) and types (str).
Examples
>>> conn.get_pipe_columns_types(pipe)
{
  'dt': 'TIMESTAMP WITHOUT TIMEZONE',
  'id': 'BIGINT',
  'val': 'DOUBLE PRECISION',
}
>>>
def get_to_sql_dtype( self, pipe: meerschaum.Pipe, df: "'pd.DataFrame'", update_dtypes: bool = True) -> "Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']":
3624def get_to_sql_dtype(
3625    self,
3626    pipe: 'mrsm.Pipe',
3627    df: 'pd.DataFrame',
3628    update_dtypes: bool = True,
3629) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
3630    """
3631    Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`.
3632
3633    Parameters
3634    ----------
3635    pipe: mrsm.Pipe
3636        The pipe which may contain a `dtypes` parameter.
3637
3638    df: pd.DataFrame
3639        The DataFrame to be pushed via `to_sql()`.
3640
3641    update_dtypes: bool, default True
3642        If `True`, patch the pipe's dtypes onto the DataFrame's dtypes.
3643
3644    Returns
3645    -------
3646    A dictionary with `sqlalchemy` datatypes.
3647
3648    Examples
3649    --------
3650    >>> import pandas as pd
3651    >>> import meerschaum as mrsm
3652    >>> 
3653    >>> conn = mrsm.get_connector('sql:memory')
3654    >>> df = pd.DataFrame([{'a': {'b': 1}}])
3655    >>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'})
3656    >>> get_to_sql_dtype(pipe, df)
3657    {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
3658    """
3659    from meerschaum.utils.dataframe import get_special_cols
3660    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
3661    df_dtypes = {
3662        col: str(typ)
3663        for col, typ in df.dtypes.items()
3664    }
3665    special_cols = get_special_cols(df)
3666    df_dtypes.update(special_cols)
3667
3668    if update_dtypes:
3669        df_dtypes.update(pipe.dtypes)
3670
3671    return {
3672        col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True)
3673        for col, typ in df_dtypes.items()
3674        if col and typ
3675    }

Given a pipe and DataFrame, return the dtype dictionary for to_sql().

Parameters
  • pipe (mrsm.Pipe): The pipe which may contain a dtypes parameter.
  • df (pd.DataFrame): The DataFrame to be pushed via to_sql().
  • update_dtypes (bool, default True): If True, patch the pipe's dtypes onto the DataFrame's dtypes.
Returns
  • A dictionary with sqlalchemy datatypes.
Examples
>>> import pandas as pd
>>> import meerschaum as mrsm
>>> 
>>> conn = mrsm.get_connector('sql:memory')
>>> df = pd.DataFrame([{'a': {'b': 1}}])
>>> pipe = mrsm.Pipe('a', 'b', dtypes={'a': 'json'})
>>> get_to_sql_dtype(pipe, df)
{'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
def get_pipe_schema(self, pipe: meerschaum.Pipe) -> Optional[str]:
3897def get_pipe_schema(self, pipe: mrsm.Pipe) -> Union[str, None]:
3898    """
3899    Return the schema to use for this pipe.
3900    First check `pipe.parameters['schema']`, then check `self.schema`.
3901
3902    Parameters
3903    ----------
3904    pipe: mrsm.Pipe
3905        The pipe which may contain a configured schema.
3906
3907    Returns
3908    -------
3909    A schema string or `None` if nothing is configured.
3910    """
3911    if self.flavor in ('sqlite', 'geopackage'):
3912        return self.schema
3913    return pipe.parameters.get('schema', self.schema)

Return the schema to use for this pipe. First check pipe.parameters['schema'], then check self.schema.

Parameters
  • pipe (mrsm.Pipe): The pipe which may contain a configured schema.
Returns
  • A schema string or None if nothing is configured.
def create_pipe_table_from_df( self, pipe: meerschaum.Pipe, df: "'pd.DataFrame'", debug: bool = False) -> Tuple[bool, str]:
1551def create_pipe_table_from_df(
1552    self,
1553    pipe: mrsm.Pipe,
1554    df: 'pd.DataFrame',
1555    debug: bool = False,
1556) -> mrsm.SuccessTuple:
1557    """
1558    Create a pipe's table from its configured dtypes and an incoming dataframe.
1559    """
1560    from meerschaum.utils.dataframe import get_special_cols
1561    from meerschaum.utils.sql import (
1562        get_create_table_queries,
1563        sql_item_name,
1564        get_create_schema_if_not_exists_queries,
1565    )
1566    from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1567    if self.flavor == 'geopackage':
1568        init_success, init_msg = self._init_geopackage_pipe(df, pipe, debug=debug)
1569        if not init_success:
1570            return init_success, init_msg
1571
1572    primary_key = pipe.columns.get('primary', None)
1573    primary_key_typ = (
1574        pipe.dtypes.get(primary_key, str(df.dtypes.get(primary_key, 'int')))
1575        if primary_key
1576        else None
1577    )
1578    primary_key_db_type = (
1579        get_db_type_from_pd_type(primary_key_typ, self.flavor)
1580        if primary_key
1581        else None
1582    )
1583    dt_col = pipe.columns.get('datetime', None)
1584    new_dtypes = {
1585        **{
1586            col: str(typ)
1587            for col, typ in df.dtypes.items()
1588        },
1589        **{
1590            col: str(df.dtypes.get(col, 'int'))
1591            for col_ix, col in pipe.columns.items()
1592            if col and col_ix != 'primary'
1593        },
1594        **get_special_cols(df),
1595        **pipe.dtypes
1596    }
1597    autoincrement = (
1598        pipe.parameters.get('autoincrement', False)
1599        or (primary_key and primary_key not in new_dtypes)
1600    )
1601    if autoincrement:
1602        _ = new_dtypes.pop(primary_key, None)
1603
1604    schema = self.get_pipe_schema(pipe)
1605    create_table_queries = get_create_table_queries(
1606        new_dtypes,
1607        pipe.target,
1608        self.flavor,
1609        schema=schema,
1610        primary_key=primary_key,
1611        primary_key_db_type=primary_key_db_type,
1612        datetime_column=dt_col,
1613    )
1614    if schema:
1615        create_table_queries = (
1616            get_create_schema_if_not_exists_queries(schema, self.flavor)
1617            + create_table_queries
1618        )
1619    success = all(
1620        self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
1621    )
1622    target_name = sql_item_name(pipe.target, schema=self.get_pipe_schema(pipe), flavor=self.flavor)
1623    msg = (
1624        "Success"
1625        if success
1626        else f"Failed to create {target_name}."
1627    )
1628    if success and self.flavor == 'geopackage':
1629        return self._init_geopackage_pipe(df, pipe, debug=debug)
1630
1631    return success, msg

Create a pipe's table from its configured dtypes and an incoming dataframe.

def get_pipe_columns_indices( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, List[Dict[str, str]]]:
3132def get_pipe_columns_indices(
3133    self,
3134    pipe: mrsm.Pipe,
3135    debug: bool = False,
3136) -> Dict[str, List[Dict[str, str]]]:
3137    """
3138    Return a dictionary mapping columns to the indices created on those columns.
3139
3140    Parameters
3141    ----------
3142    pipe: mrsm.Pipe
3143        The pipe to be queried against.
3144
3145    Returns
3146    -------
3147    A dictionary mapping columns names to lists of dictionaries.
3148    The dictionaries in the lists contain the name and type of the indices.
3149    """
3150    if pipe.__dict__.get('_skip_check_indices', False):
3151        return {}
3152
3153    from meerschaum.utils.sql import get_table_cols_indices
3154    return get_table_cols_indices(
3155        pipe.target,
3156        self,
3157        flavor=self.flavor,
3158        schema=self.get_pipe_schema(pipe),
3159        debug=debug,
3160    )

Return a dictionary mapping columns to the indices created on those columns.

Parameters
  • pipe (mrsm.Pipe): The pipe to be queried against.
Returns
  • A dictionary mapping columns names to lists of dictionaries.
  • The dictionaries in the lists contain the name and type of the indices.
@staticmethod
def get_temporary_target( target: str, transact_id: Optional[str] = None, label: Optional[str] = None, separator: Optional[str] = None) -> str:
3916@staticmethod
3917def get_temporary_target(
3918    target: str,
3919    transact_id: Optional[str] = None,
3920    label: Optional[str] = None,
3921    separator: Optional[str] = None,
3922) -> str:
3923    """
3924    Return a unique(ish) temporary target for a pipe.
3925    """
3926    from meerschaum.utils.misc import generate_password
3927    temp_target_cf = (
3928        mrsm.get_config('system', 'connectors', 'sql', 'instance', 'temporary_target') or {}
3929    )
3930    transaction_id_len = temp_target_cf.get('transaction_id_length', 3)
3931    transact_id = transact_id or generate_password(transaction_id_len)
3932    temp_prefix = temp_target_cf.get('prefix', '_')
3933    separator = separator or temp_target_cf.get('separator', '_')
3934    return (
3935        temp_prefix
3936        + target
3937        + separator
3938        + transact_id
3939        + ((separator + label) if label else '')
3940    )

Return a unique(ish) temporary target for a pipe.

def create_pipe_indices( self, pipe: meerschaum.Pipe, columns: Optional[List[str]] = None, debug: bool = False) -> Tuple[bool, str]:
351def create_pipe_indices(
352    self,
353    pipe: mrsm.Pipe,
354    columns: Optional[List[str]] = None,
355    debug: bool = False,
356) -> SuccessTuple:
357    """
358    Create a pipe's indices.
359    """
360    success = self.create_indices(pipe, columns=columns, debug=debug)
361    msg = (
362        "Success"
363        if success
364        else f"Failed to create indices for {pipe}."
365    )
366    return success, msg

Create a pipe's indices.

def drop_pipe_indices( self, pipe: meerschaum.Pipe, columns: Optional[List[str]] = None, debug: bool = False) -> Tuple[bool, str]:
407def drop_pipe_indices(
408    self,
409    pipe: mrsm.Pipe,
410    columns: Optional[List[str]] = None,
411    debug: bool = False,
412) -> SuccessTuple:
413    """
414    Drop a pipe's indices.
415    """
416    success = self.drop_indices(pipe, columns=columns, debug=debug)
417    msg = (
418        "Success"
419        if success
420        else f"Failed to drop indices for {pipe}."
421    )
422    return success, msg

Drop a pipe's indices.

def get_pipe_index_names(self, pipe: meerschaum.Pipe) -> Dict[str, str]:
459def get_pipe_index_names(self, pipe: mrsm.Pipe) -> Dict[str, str]:
460    """
461    Return a dictionary mapping index keys to their names on the database.
462
463    Returns
464    -------
465    A dictionary of index keys to column names.
466    """
467    from meerschaum.utils.sql import DEFAULT_SCHEMA_FLAVORS, truncate_item_name
468    _parameters = pipe.parameters
469    _index_template = _parameters.get('index_template', "IX_{schema_str}{target}_{column_names}")
470    _schema = self.get_pipe_schema(pipe)
471    if _schema is None:
472        _schema = (
473            DEFAULT_SCHEMA_FLAVORS.get(self.flavor, None)
474            if self.flavor != 'mssql'
475            else None
476        )
477    schema_str = '' if _schema is None else f'{_schema}_'
478    schema_str = ''
479    _indices = pipe.indices
480    _target = pipe.target
481    _column_names = {
482        ix: (
483            '_'.join(cols)
484            if isinstance(cols, (list, tuple))
485            else str(cols)
486        )
487        for ix, cols in _indices.items()
488        if cols
489    }
490    _index_names = {
491        ix: _index_template.format(
492            target=_target,
493            column_names=column_names,
494            connector_keys=pipe.connector_keys,
495            metric_key=pipe.metric_key,
496            location_key=pipe.location_key,
497            schema_str=schema_str,
498        )
499        for ix, column_names in _column_names.items()
500    }
501    ### NOTE: Skip any duplicate indices.
502    seen_index_names = {}
503    for ix, index_name in _index_names.items():
504        if index_name in seen_index_names:
505            continue
506        seen_index_names[index_name] = ix
507    return {
508        ix: truncate_item_name(index_name, flavor=self.flavor)
509        for index_name, ix in seen_index_names.items()
510    }

Return a dictionary mapping index keys to their names on the database.

Returns
  • A dictionary of index keys to column names.
def get_plugins_pipe(self) -> meerschaum.Pipe:
18def get_plugins_pipe(self) -> mrsm.Pipe:
19    """
20    Return the internal metadata plugins pipe.
21    """
22    users_pipe = self.get_users_pipe()
23    user_id_dtype = users_pipe.dtypes.get('user_id', 'int')
24    return mrsm.Pipe(
25        'mrsm', 'plugins',
26        instance=self,
27        temporary=True,
28        static=True,
29        null_indices=False,
30        columns={
31            'primary': 'plugin_id',
32            'user_id': 'user_id',    
33        },
34        dtypes={
35            'plugin_name': 'string',
36            'user_id': user_id_dtype,
37            'attributes': 'json',
38            'version': 'string',
39        },
40        indices={
41            'unique': 'plugin_name',
42        },
43    )

Return the internal metadata plugins pipe.

def register_plugin( self, plugin: meerschaum.Plugin, force: bool = False, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
 46def register_plugin(
 47    self,
 48    plugin: 'mrsm.core.Plugin',
 49    force: bool = False,
 50    debug: bool = False,
 51    **kw: Any
 52) -> SuccessTuple:
 53    """Register a new plugin to the plugins table."""
 54    from meerschaum.utils.packages import attempt_import
 55    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
 56    from meerschaum.utils.sql import json_flavors
 57    from meerschaum.connectors.sql.tables import get_tables
 58    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
 59
 60    old_id = self.get_plugin_id(plugin, debug=debug)
 61
 62    ### Check for version conflict. May be overridden with `--force`.
 63    if old_id is not None and not force:
 64        old_version = self.get_plugin_version(plugin, debug=debug)
 65        new_version = plugin.version
 66        if old_version is None:
 67            old_version = ''
 68        if new_version is None:
 69            new_version = ''
 70
 71        ### verify that the new version is greater than the old
 72        packaging_version = attempt_import('packaging.version')
 73        if (
 74            old_version and new_version
 75            and packaging_version.parse(old_version) >= packaging_version.parse(new_version)
 76        ):
 77            return False, (
 78                f"Version '{new_version}' of plugin '{plugin}' " +
 79                f"must be greater than existing version '{old_version}'."
 80            )
 81
 82    bind_variables = {
 83        'plugin_name': plugin.name,
 84        'version': plugin.version,
 85        'attributes': (
 86            json.dumps(plugin.attributes) if self.flavor not in json_flavors else plugin.attributes
 87        ),
 88        'user_id': plugin.user_id,
 89    }
 90
 91    if old_id is None:
 92        query = sqlalchemy.insert(plugins_tbl).values(**bind_variables)
 93    else:
 94        query = (
 95            sqlalchemy.update(plugins_tbl)
 96            .values(**bind_variables)
 97            .where(plugins_tbl.c.plugin_id == old_id)
 98        )
 99
100    result = self.exec(query, debug=debug)
101    if result is None:
102        return False, f"Failed to register plugin '{plugin}'."
103    return True, f"Successfully registered plugin '{plugin}'."

Register a new plugin to the plugins table.

def delete_plugin( self, plugin: meerschaum.Plugin, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
272def delete_plugin(
273    self,
274    plugin: 'mrsm.core.Plugin',
275    debug: bool = False,
276    **kw: Any
277) -> SuccessTuple:
278    """Delete a plugin from the plugins table."""
279    from meerschaum.utils.packages import attempt_import
280    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
281    from meerschaum.connectors.sql.tables import get_tables
282    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
283
284    plugin_id = self.get_plugin_id(plugin, debug=debug)
285    if plugin_id is None:
286        return True, f"Plugin '{plugin}' was not registered."
287
288    query = sqlalchemy.delete(plugins_tbl).where(plugins_tbl.c.plugin_id == plugin_id)
289    result = self.exec(query, debug=debug)
290    if result is None:
291        return False, f"Failed to delete plugin '{plugin}'."
292    return True, f"Successfully deleted plugin '{plugin}'."

Delete a plugin from the plugins table.

def get_plugin_id( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[int]:
105def get_plugin_id(
106    self,
107    plugin: 'mrsm.core.Plugin',
108    debug: bool = False
109) -> Optional[int]:
110    """
111    Return a plugin's ID.
112    """
113    ### ensure plugins table exists
114    from meerschaum.connectors.sql.tables import get_tables
115    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
116    from meerschaum.utils.packages import attempt_import
117    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
118
119    query = (
120        sqlalchemy
121        .select(plugins_tbl.c.plugin_id)
122        .where(plugins_tbl.c.plugin_name == plugin.name)
123    )
124    
125    try:
126        return int(self.value(query, debug=debug))
127    except Exception:
128        return None

Return a plugin's ID.

def get_plugin_version( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[str]:
131def get_plugin_version(
132    self,
133    plugin: 'mrsm.core.Plugin',
134    debug: bool = False
135) -> Optional[str]:
136    """
137    Return a plugin's version.
138    """
139    ### ensure plugins table exists
140    from meerschaum.connectors.sql.tables import get_tables
141    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
142    from meerschaum.utils.packages import attempt_import
143    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
144    query = sqlalchemy.select(plugins_tbl.c.version).where(plugins_tbl.c.plugin_name == plugin.name)
145    return self.value(query, debug=debug)

Return a plugin's version.

def get_plugins( self, user_id: Optional[int] = None, search_term: Optional[str] = None, debug: bool = False, **kw: Any) -> List[str]:
225def get_plugins(
226    self,
227    user_id: Optional[int] = None,
228    search_term: Optional[str] = None,
229    debug: bool = False,
230    **kw: Any
231) -> List[str]:
232    """
233    Return a list of all registered plugins.
234
235    Parameters
236    ----------
237    user_id: Optional[int], default None
238        If specified, filter plugins by a specific `user_id`.
239
240    search_term: Optional[str], default None
241        If specified, add a `WHERE plugin_name LIKE '{search_term}%'` clause to filter the plugins.
242
243
244    Returns
245    -------
246    A list of plugin names.
247    """
248    ### ensure plugins table exists
249    from meerschaum.connectors.sql.tables import get_tables
250    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
251    from meerschaum.utils.packages import attempt_import
252    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
253
254    query = sqlalchemy.select(plugins_tbl.c.plugin_name)
255    if user_id is not None:
256        query = query.where(plugins_tbl.c.user_id == user_id)
257    if search_term is not None:
258        query = query.where(plugins_tbl.c.plugin_name.like(search_term + '%'))
259
260    rows = (
261        self.execute(query).fetchall()
262        if self.flavor != 'duckdb'
263        else [
264            (row['plugin_name'],)
265            for row in self.read(query).to_dict(orient='records')
266        ]
267    )
268    
269    return [row[0] for row in rows]

Return a list of all registered plugins.

Parameters
  • user_id (Optional[int], default None): If specified, filter plugins by a specific user_id.
  • search_term (Optional[str], default None): If specified, add a WHERE plugin_name LIKE '{search_term}%' clause to filter the plugins.
Returns
  • A list of plugin names.
def get_plugin_user_id( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[int]:
147def get_plugin_user_id(
148    self,
149    plugin: 'mrsm.core.Plugin',
150    debug: bool = False
151) -> Optional[int]:
152    """
153    Return a plugin's user ID.
154    """
155    ### ensure plugins table exists
156    from meerschaum.connectors.sql.tables import get_tables
157    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
158    from meerschaum.utils.packages import attempt_import
159    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
160
161    query = (
162        sqlalchemy
163        .select(plugins_tbl.c.user_id)
164        .where(plugins_tbl.c.plugin_name == plugin.name)
165    )
166
167    try:
168        return int(self.value(query, debug=debug))
169    except Exception:
170        return None

Return a plugin's user ID.

def get_plugin_username( self, plugin: meerschaum.Plugin, debug: bool = False) -> Optional[str]:
172def get_plugin_username(
173    self,
174    plugin: 'mrsm.core.Plugin',
175    debug: bool = False
176) -> Optional[str]:
177    """
178    Return the username of a plugin's owner.
179    """
180    ### ensure plugins table exists
181    from meerschaum.connectors.sql.tables import get_tables
182    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
183    users = get_tables(mrsm_instance=self, debug=debug)['users']
184    from meerschaum.utils.packages import attempt_import
185    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
186
187    query = (
188        sqlalchemy.select(users.c.username)
189        .where(
190            users.c.user_id == plugins_tbl.c.user_id
191            and plugins_tbl.c.plugin_name == plugin.name
192        )
193    )
194
195    return self.value(query, debug=debug)

Return the username of a plugin's owner.

def get_plugin_attributes( self, plugin: meerschaum.Plugin, debug: bool = False) -> Dict[str, Any]:
198def get_plugin_attributes(
199    self,
200    plugin: 'mrsm.core.Plugin',
201    debug: bool = False
202) -> Dict[str, Any]:
203    """
204    Return the attributes of a plugin.
205    """
206    ### ensure plugins table exists
207    from meerschaum.connectors.sql.tables import get_tables
208    plugins_tbl = get_tables(mrsm_instance=self, debug=debug)['plugins']
209    from meerschaum.utils.packages import attempt_import
210    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
211
212    query = (
213        sqlalchemy
214        .select(plugins_tbl.c.attributes)
215        .where(plugins_tbl.c.plugin_name == plugin.name)
216    )
217
218    _attr = self.value(query, debug=debug)
219    if isinstance(_attr, str):
220        _attr = json.loads(_attr)
221    elif _attr is None:
222        _attr = {}
223    return _attr

Return the attributes of a plugin.

def get_users_pipe(self) -> meerschaum.Pipe:
16def get_users_pipe(self) -> mrsm.Pipe:
17    """
18    Return the internal metadata pipe for users management.
19    """
20    if '_users_pipe' in self.__dict__:
21        return self._users_pipe
22
23    cache_connector = self.__dict__.get('_cache_connector', None)
24    self._users_pipe = mrsm.Pipe(
25        'mrsm', 'users',
26        temporary=True,
27        cache=True,
28        cache_connector_keys=cache_connector,
29        static=True,
30        null_indices=False,
31        enforce=False,
32        autoincrement=True,
33        columns={
34            'primary': 'user_id',
35        },
36        dtypes={
37            'user_id': 'int',
38            'username': 'string',
39            'attributes': 'json',
40            'user_type': 'string',
41        },
42        indices={
43            'unique': 'username',
44        },
45    )
46    return self._users_pipe

Return the internal metadata pipe for users management.

def register_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
49def register_user(
50    self,
51    user: mrsm.core.User,
52    debug: bool = False,
53    **kw: Any
54) -> SuccessTuple:
55    """Register a new user."""
56    from meerschaum.utils.packages import attempt_import
57    from meerschaum.utils.sql import json_flavors
58    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
59
60    valid_tuple = valid_username(user.username)
61    if not valid_tuple[0]:
62        return valid_tuple
63
64    old_id = self.get_user_id(user, debug=debug)
65
66    if old_id is not None:
67        return False, f"User '{user}' already exists."
68
69    ### ensure users table exists
70    from meerschaum.connectors.sql.tables import get_tables
71    tables = get_tables(mrsm_instance=self, debug=debug)
72
73    import json
74    bind_variables = {
75        'username': user.username,
76        'email': user.email,
77        'password_hash': user.password_hash,
78        'user_type': user.type,
79        'attributes': (
80            json.dumps(user.attributes)
81            if self.flavor not in json_flavors
82            else user.attributes
83        ),
84    }
85    if old_id is not None:
86        return False, f"User '{user.username}' already exists."
87    if old_id is None:
88        query = (
89            sqlalchemy.insert(tables['users']).
90            values(**bind_variables)
91        )
92
93    result = self.exec(query, debug=debug)
94    if result is None:
95        return False, f"Failed to register user '{user}'."
96    return True, f"Successfully registered user '{user}'."

Register a new user.

def get_user_id( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[int]:
188def get_user_id(
189    self,
190    user: 'mrsm.core.User',
191    debug: bool = False
192) -> Optional[int]:
193    """If a user is registered, return the `user_id`."""
194    ### ensure users table exists
195    from meerschaum.utils.packages import attempt_import
196    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
197    from meerschaum.connectors.sql.tables import get_tables
198    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
199
200    query = (
201        sqlalchemy.select(users_tbl.c.user_id)
202        .where(users_tbl.c.username == user.username)
203    )
204
205    result = self.value(query, debug=debug)
206    if result is not None:
207        return int(result)
208    return None

If a user is registered, return the user_id.

def get_users(self, debug: bool = False, **kw: Any) -> List[str]:
282def get_users(
283    self,
284    debug: bool = False,
285    **kw: Any
286) -> List[str]:
287    """
288    Get the registered usernames.
289    """
290    ### ensure users table exists
291    from meerschaum.connectors.sql.tables import get_tables
292    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
293    from meerschaum.utils.packages import attempt_import
294    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
295
296    query = sqlalchemy.select(users_tbl.c.username)
297
298    return list(self.read(query, debug=debug)['username'])

Get the registered usernames.

def edit_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
133def edit_user(
134    self,
135    user: 'mrsm.core.User',
136    debug: bool = False,
137    **kw: Any
138) -> SuccessTuple:
139    """Update an existing user's metadata."""
140    from meerschaum.utils.packages import attempt_import
141    from meerschaum.utils.sql import json_flavors
142    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
143    from meerschaum.connectors.sql.tables import get_tables
144    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
145
146    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
147    if user_id is None:
148        return False, (
149            f"User '{user.username}' does not exist. "
150            f"Register user '{user.username}' before editing."
151        )
152    user.user_id = user_id
153
154    import json
155    valid_tuple = valid_username(user.username)
156    if not valid_tuple[0]:
157        return valid_tuple
158
159    bind_variables = {
160        'user_id' : user_id,
161        'username' : user.username,
162    }
163    if user.password != '':
164        bind_variables['password_hash'] = user.password_hash
165    if user.email != '':
166        bind_variables['email'] = user.email
167    if user.attributes is not None and user.attributes != {}:
168        bind_variables['attributes'] = (
169            json.dumps(user.attributes) if self.flavor not in json_flavors
170            else user.attributes
171        )
172    if user.type != '':
173        bind_variables['user_type'] = user.type
174
175    query = (
176        sqlalchemy
177        .update(users_tbl)
178        .values(**bind_variables)
179        .where(users_tbl.c.user_id == user_id)
180    )
181
182    result = self.exec(query, debug=debug)
183    if result is None:
184        return False, f"Failed to edit user '{user}'."
185    return True, f"Successfully edited user '{user}'."

Update an existing user's metadata.

def delete_user( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Tuple[bool, str]:
250def delete_user(
251    self,
252    user: 'mrsm.core.User',
253    debug: bool = False
254) -> SuccessTuple:
255    """Delete a user's record from the users table."""
256    ### ensure users table exists
257    from meerschaum.connectors.sql.tables import get_tables
258    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
259    plugins = get_tables(mrsm_instance=self, debug=debug)['plugins']
260    from meerschaum.utils.packages import attempt_import
261    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
262
263    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
264
265    if user_id is None:
266        return False, f"User '{user.username}' is not registered and cannot be deleted."
267
268    query = sqlalchemy.delete(users_tbl).where(users_tbl.c.user_id == user_id)
269
270    result = self.exec(query, debug=debug)
271    if result is None:
272        return False, f"Failed to delete user '{user}'."
273
274    query = sqlalchemy.delete(plugins).where(plugins.c.user_id == user_id)
275    result = self.exec(query, debug=debug)
276    if result is None:
277        return False, f"Failed to delete plugins of user '{user}'."
278
279    return True, f"Successfully deleted user '{user}'"

Delete a user's record from the users table.

def get_user_password_hash( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
301def get_user_password_hash(
302    self,
303    user: 'mrsm.core.User',
304    debug: bool = False,
305    **kw: Any
306) -> Optional[str]:
307    """
308    Return the password has for a user.
309    **NOTE**: This may be dangerous and is only allowed if the security settings explicity allow it.
310    """
311    from meerschaum.utils.debug import dprint
312    from meerschaum.connectors.sql.tables import get_tables
313    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
314    from meerschaum.utils.packages import attempt_import
315    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
316
317    if user.user_id is not None:
318        user_id = user.user_id
319        if debug:
320            dprint(f"Already given user_id: {user_id}")
321    else:
322        if debug:
323            dprint("Fetching user_id...")
324        user_id = self.get_user_id(user, debug=debug)
325
326    if user_id is None:
327        return None
328
329    query = sqlalchemy.select(users_tbl.c.password_hash).where(users_tbl.c.user_id == user_id)
330
331    return self.value(query, debug=debug)

Return the password has for a user. NOTE: This may be dangerous and is only allowed if the security settings explicity allow it.

def get_user_type( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
334def get_user_type(
335    self,
336    user: 'mrsm.core.User',
337    debug: bool = False,
338    **kw: Any
339) -> Optional[str]:
340    """
341    Return the user's type.
342    """
343    from meerschaum.connectors.sql.tables import get_tables
344    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
345    from meerschaum.utils.packages import attempt_import
346    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
347
348    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
349
350    if user_id is None:
351        return None
352
353    query = sqlalchemy.select(users_tbl.c.user_type).where(users_tbl.c.user_id == user_id)
354
355    return self.value(query, debug=debug)

Return the user's type.

def get_user_attributes( self, user: meerschaum.core.User._User.User, debug: bool = False) -> Optional[Dict[str, Any]]:
210def get_user_attributes(
211    self,
212    user: 'mrsm.core.User',
213    debug: bool = False
214) -> Union[Dict[str, Any], None]:
215    """
216    Return the user's attributes.
217    """
218    ### ensure users table exists
219    from meerschaum.utils.warnings import warn
220    from meerschaum.utils.packages import attempt_import
221    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
222    from meerschaum.connectors.sql.tables import get_tables
223    users_tbl = get_tables(mrsm_instance=self, debug=debug)['users']
224
225    user_id = user.user_id if user.user_id is not None else self.get_user_id(user, debug=debug)
226
227    query = (
228        sqlalchemy.select(users_tbl.c.attributes)
229        .where(users_tbl.c.user_id == user_id)
230    )
231
232    result = self.value(query, debug=debug)
233    if result is not None and not isinstance(result, dict):
234        try:
235            result = dict(result)
236            _parsed = True
237        except Exception:
238            _parsed = False
239        if not _parsed:
240            try:
241                import json
242                result = json.loads(result)
243                _parsed = True
244            except Exception:
245                _parsed = False
246        if not _parsed:
247            warn(f"Received unexpected type for attributes: {result}")
248    return result

Return the user's attributes.

@classmethod
def from_uri( cls, uri: str, label: Optional[str] = None, as_dict: bool = False) -> Union[SQLConnector, Dict[str, Union[str, int]]]:
15@classmethod
16def from_uri(
17    cls,
18    uri: str,
19    label: Optional[str] = None,
20    as_dict: bool = False,
21) -> Union[
22    'meerschaum.connectors.SQLConnector',
23    Dict[str, Union[str, int]],
24]:
25    """
26    Create a new SQLConnector from a URI string.
27
28    Parameters
29    ----------
30    uri: str
31        The URI connection string.
32
33    label: Optional[str], default None
34        If provided, use this as the connector label.
35        Otherwise use the determined database name.
36
37    as_dict: bool, default False
38        If `True`, return a dictionary of the keyword arguments
39        necessary to create a new `SQLConnector`, otherwise create a new object.
40
41    Returns
42    -------
43    A new SQLConnector object or a dictionary of attributes (if `as_dict` is `True`).
44    """
45
46    params = cls.parse_uri(uri)
47    params['uri'] = uri
48    flavor = params.get('flavor', None)
49    if not flavor or flavor not in cls.flavor_configs:
50        error(f"Invalid flavor '{flavor}' detected from the provided URI.")
51
52    if 'database' not in params:
53        error("Unable to determine the database from the provided URI.")
54
55    if flavor in ('sqlite', 'duckdb', 'geopackage'):
56        if params['database'] == ':memory:':
57            params['label'] = label or f'memory_{flavor}'
58        else:
59            params['label'] = label or params['database'].split(os.path.sep)[-1].lower()
60    else:
61        params['label'] = label or (
62            (
63                (params['username'] + '@' if 'username' in params else '')
64                + params.get('host', '')
65                + ('/' if 'host' in params else '')
66                + params.get('database', '')
67            ).lower()
68        )
69
70    return cls(**params) if not as_dict else params

Create a new SQLConnector from a URI string.

Parameters
  • uri (str): The URI connection string.
  • label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
  • as_dict (bool, default False): If True, return a dictionary of the keyword arguments necessary to create a new SQLConnector, otherwise create a new object.
Returns
  • A new SQLConnector object or a dictionary of attributes (if as_dict is True).
@staticmethod
def parse_uri(uri: str) -> Dict[str, Any]:
 73@staticmethod
 74def parse_uri(uri: str) -> Dict[str, Any]:
 75    """
 76    Parse a URI string into a dictionary of parameters.
 77
 78    Parameters
 79    ----------
 80    uri: str
 81        The database connection URI.
 82
 83    Returns
 84    -------
 85    A dictionary of attributes.
 86
 87    Examples
 88    --------
 89    >>> parse_uri('sqlite:////home/foo/bar.db')
 90    {'database': '/home/foo/bar.db', 'flavor': 'sqlite'}
 91    >>> parse_uri(
 92    ...     'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439'
 93    ...     + '/master?driver=ODBC+Driver+17+for+SQL+Server'
 94    ... )
 95    {'host': 'localhost', 'database': 'master', 'username': 'sa',
 96    'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql',
 97    'driver': 'ODBC Driver 17 for SQL Server'}
 98    >>> 
 99    """
100    from urllib.parse import parse_qs, urlparse
101    sqlalchemy = attempt_import('sqlalchemy', lazy=False)
102    parser = sqlalchemy.engine.url.make_url
103    params = parser(uri).translate_connect_args()
104    params['flavor'] = uri.split(':')[0].split('+')[0]
105    if params['flavor'] == 'postgres':
106        params['flavor'] = 'postgresql'
107    if '?' in uri:
108        parsed_uri = urlparse(uri)
109        for key, value in parse_qs(parsed_uri.query).items():
110            params.update({key: value[0]})
111
112        if '--search_path' in params.get('options', ''):
113            params.update({'schema': params['options'].replace('--search_path=', '', 1)})
114    return params

Parse a URI string into a dictionary of parameters.

Parameters
  • uri (str): The database connection URI.
Returns
  • A dictionary of attributes.
Examples
>>> parse_uri('sqlite:////home/foo/bar.db')
{'database': '/home/foo/bar.db', 'flavor': 'sqlite'}
>>> parse_uri(
...     'mssql+pyodbc://sa:supersecureSECRETPASSWORD123!@localhost:1439'
...     + '/master?driver=ODBC+Driver+17+for+SQL+Server'
... )
{'host': 'localhost', 'database': 'master', 'username': 'sa',
'password': 'supersecureSECRETPASSWORD123!', 'port': 1439, 'flavor': 'mssql',
'driver': 'ODBC Driver 17 for SQL Server'}
>>>
class APIConnector(meerschaum.connectors.InstanceConnector):
 22class APIConnector(InstanceConnector):
 23    """
 24    Connect to a Meerschaum API instance.
 25    """
 26
 27    IS_THREAD_SAFE: bool = False
 28    OPTIONAL_ATTRIBUTES: List[str] = ['port', 'client_secret', 'client_id', 'api_key']
 29
 30    from ._request import (
 31        make_request,
 32        get,
 33        post,
 34        put,
 35        patch,
 36        delete,
 37        wget,
 38    )
 39    from ._actions import (
 40        get_actions,
 41        do_action,
 42        do_action_async,
 43        do_action_legacy,
 44    )
 45    from ._misc import get_mrsm_version, get_chaining_status
 46    from ._pipes import (
 47        get_pipe_instance_keys,
 48        register_pipe,
 49        fetch_pipes_keys,
 50        edit_pipe,
 51        sync_pipe,
 52        delete_pipe,
 53        get_pipe_data,
 54        get_pipe_id,
 55        get_pipe_attributes,
 56        get_sync_time,
 57        pipe_exists,
 58        create_metadata,
 59        get_pipe_rowcount,
 60        drop_pipe,
 61        clear_pipe,
 62        get_pipe_columns_types,
 63        get_pipe_columns_indices,
 64    )
 65    from ._fetch import fetch
 66    from ._plugins import (
 67        register_plugin,
 68        install_plugin,
 69        delete_plugin,
 70        get_plugins,
 71        get_plugin_attributes,
 72    )
 73    from ._login import login, test_connection
 74    from ._users import (
 75        register_user,
 76        get_user_id,
 77        get_users,
 78        edit_user,
 79        delete_user,
 80        get_user_password_hash,
 81        get_user_type,
 82        get_user_attributes,
 83    )
 84    from ._tokens import (
 85        register_token,
 86        get_token_model,
 87        get_tokens,
 88        edit_token,
 89        invalidate_token,
 90        get_token_scopes,
 91        token_exists,
 92        delete_token,
 93    )
 94    from ._uri import from_uri
 95    from ._jobs import (
 96        get_jobs,
 97        get_job,
 98        get_job_metadata,
 99        get_job_properties,
100        get_job_exists,
101        delete_job,
102        start_job,
103        create_job,
104        stop_job,
105        pause_job,
106        get_logs,
107        get_job_stop_time,
108        monitor_logs,
109        monitor_logs_async,
110        get_job_is_blocking_on_stdin,
111        get_job_began,
112        get_job_ended,
113        get_job_paused,
114        get_job_status,
115    )
116
117    def __init__(
118        self,
119        label: Optional[str] = None,
120        wait: bool = False,
121        debug: bool = False,
122        **kw
123    ):
124        if 'uri' in kw:
125            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
126            label = label or from_uri_params.get('label', None)
127            _ = from_uri_params.pop('label', None)
128            kw.update(from_uri_params)
129
130        super().__init__('api', label=label, **kw)
131        if 'protocol' not in self.__dict__:
132            self.protocol = (
133                'https' if self.__dict__.get('uri', '').startswith('https')
134                else 'http'
135            )
136
137        if 'uri' not in self.__dict__:
138            self.verify_attributes(required_attributes)
139        else:
140            from meerschaum.connectors.sql import SQLConnector
141            conn_attrs = SQLConnector.parse_uri(self.__dict__['uri'])
142            if 'host' not in conn_attrs:
143                raise Exception(f"Invalid URI for '{self}'.")
144            self.__dict__.update(conn_attrs)
145
146        self.url = (
147            self.protocol + '://' +
148            self.host
149            + (
150                (':' + str(self.port))
151                if self.__dict__.get('port', None)
152                else ''
153            )
154        )
155        self._token = None
156        self._expires = None
157        self._session = None
158        self._instance_keys = self.__dict__.get('instance_keys', None)
159
160
161    @property
162    def URI(self) -> str:
163        """
164        Return the fully qualified URI.
165        """
166        import urllib.parse
167        username = self.__dict__.get('username', None)
168        password = self.__dict__.get('password', None)
169        client_id = self.__dict__.get('client_id', None)
170        client_secret = self.__dict__.get('client_secret', None)
171        api_key = self.__dict__.get('api_key', None)
172        creds = (username + ':' + password + '@') if username and password else ''
173        params = {}
174        params_str = ('?' + urllib.parse.urlencode(params)) if params else ''
175        return (
176            self.protocol
177            + '://'
178            + creds
179            + self.host
180            + (
181                (':' + str(self.port))
182                if self.__dict__.get('port', None)
183                else ''
184            )
185            + params_str
186        )
187
188    @property
189    def session(self):
190        if self._session is None:
191            _ = attempt_import('certifi', lazy=False)
192            requests = attempt_import('requests', lazy=False)
193            if requests:
194                self._session = requests.Session()
195            if self._session is None:
196                error("Failed to import requests. Is requests installed?")
197        return self._session
198
199    @property
200    def token(self):
201        expired = (
202            True if self._expires is None else (
203                (
204                    self._expires
205                    <
206                    datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1)
207                )
208            )
209        )
210
211        if self._token is None or expired:
212            success, msg = self.login()
213            if not success and not self.__dict__.get('_emitted_warning'):
214                warn(msg, stack=False)
215                self._emitted_warning = True
216        return self._token
217
218    @property
219    def instance_keys(self) -> Union[str, None]:
220        """
221        Return the instance keys to be sent alongside pipe requests.
222        """
223        return self._instance_keys
224
225    @property
226    def login_scheme(self) -> str:
227        """
228        Return the login scheme to use based on the configured credentials.
229        """
230        if 'username' in self.__dict__:
231            return 'password'
232        if 'client_id' in self.__dict__:
233            return 'client_credentials'
234        elif 'api_key' in self.__dict__:
235            return 'api_key'
236
237        return 'password'

Connect to a Meerschaum API instance.

APIConnector( label: Optional[str] = None, wait: bool = False, debug: bool = False, **kw)
117    def __init__(
118        self,
119        label: Optional[str] = None,
120        wait: bool = False,
121        debug: bool = False,
122        **kw
123    ):
124        if 'uri' in kw:
125            from_uri_params = self.from_uri(kw['uri'], as_dict=True)
126            label = label or from_uri_params.get('label', None)
127            _ = from_uri_params.pop('label', None)
128            kw.update(from_uri_params)
129
130        super().__init__('api', label=label, **kw)
131        if 'protocol' not in self.__dict__:
132            self.protocol = (
133                'https' if self.__dict__.get('uri', '').startswith('https')
134                else 'http'
135            )
136
137        if 'uri' not in self.__dict__:
138            self.verify_attributes(required_attributes)
139        else:
140            from meerschaum.connectors.sql import SQLConnector
141            conn_attrs = SQLConnector.parse_uri(self.__dict__['uri'])
142            if 'host' not in conn_attrs:
143                raise Exception(f"Invalid URI for '{self}'.")
144            self.__dict__.update(conn_attrs)
145
146        self.url = (
147            self.protocol + '://' +
148            self.host
149            + (
150                (':' + str(self.port))
151                if self.__dict__.get('port', None)
152                else ''
153            )
154        )
155        self._token = None
156        self._expires = None
157        self._session = None
158        self._instance_keys = self.__dict__.get('instance_keys', None)

Set the given keyword arguments as attributes.

Parameters
  • type (str): The type of the connector (e.g. sql, api, plugin).
  • label (str): The label for the connector.
Examples

Run mrsm edit config and to edit connectors in the YAML file:

meerschaum:
    connections:
        {type}:
            {label}:
                ### attributes go here
IS_THREAD_SAFE: bool = False
OPTIONAL_ATTRIBUTES: List[str] = ['port', 'client_secret', 'client_id', 'api_key']
url
URI: str
161    @property
162    def URI(self) -> str:
163        """
164        Return the fully qualified URI.
165        """
166        import urllib.parse
167        username = self.__dict__.get('username', None)
168        password = self.__dict__.get('password', None)
169        client_id = self.__dict__.get('client_id', None)
170        client_secret = self.__dict__.get('client_secret', None)
171        api_key = self.__dict__.get('api_key', None)
172        creds = (username + ':' + password + '@') if username and password else ''
173        params = {}
174        params_str = ('?' + urllib.parse.urlencode(params)) if params else ''
175        return (
176            self.protocol
177            + '://'
178            + creds
179            + self.host
180            + (
181                (':' + str(self.port))
182                if self.__dict__.get('port', None)
183                else ''
184            )
185            + params_str
186        )

Return the fully qualified URI.

session
188    @property
189    def session(self):
190        if self._session is None:
191            _ = attempt_import('certifi', lazy=False)
192            requests = attempt_import('requests', lazy=False)
193            if requests:
194                self._session = requests.Session()
195            if self._session is None:
196                error("Failed to import requests. Is requests installed?")
197        return self._session
token
199    @property
200    def token(self):
201        expired = (
202            True if self._expires is None else (
203                (
204                    self._expires
205                    <
206                    datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(minutes=1)
207                )
208            )
209        )
210
211        if self._token is None or expired:
212            success, msg = self.login()
213            if not success and not self.__dict__.get('_emitted_warning'):
214                warn(msg, stack=False)
215                self._emitted_warning = True
216        return self._token
instance_keys: Optional[str]
218    @property
219    def instance_keys(self) -> Union[str, None]:
220        """
221        Return the instance keys to be sent alongside pipe requests.
222        """
223        return self._instance_keys

Return the instance keys to be sent alongside pipe requests.

login_scheme: str
225    @property
226    def login_scheme(self) -> str:
227        """
228        Return the login scheme to use based on the configured credentials.
229        """
230        if 'username' in self.__dict__:
231            return 'password'
232        if 'client_id' in self.__dict__:
233            return 'client_credentials'
234        elif 'api_key' in self.__dict__:
235            return 'api_key'
236
237        return 'password'

Return the login scheme to use based on the configured credentials.

def make_request( self, method: str, r_url: str, headers: Optional[Dict[str, Any]] = None, use_token: bool = True, debug: bool = False, **kwargs: Any) -> requests.models.Response:
 28def make_request(
 29    self,
 30    method: str,
 31    r_url: str,
 32    headers: Optional[Dict[str, Any]] = None,
 33    use_token: bool = True,
 34    debug: bool = False,
 35    **kwargs: Any
 36) -> 'requests.Response':
 37    """
 38    Make a request to this APIConnector's endpoint using the in-memory session.
 39
 40    Parameters
 41    ----------
 42    method: str
 43        The kind of request to make.
 44        Accepted values:
 45        - `'GET'`
 46        - `'OPTIONS'`
 47        - `'HEAD'`
 48        - `'POST'`
 49        - `'PUT'`
 50        - `'PATCH'`
 51        - `'DELETE'`
 52
 53    r_url: str
 54        The relative URL for the endpoint (e.g. `'/pipes'`).
 55
 56    headers: Optional[Dict[str, Any]], default None
 57        The headers to use for the request.
 58        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
 59
 60    use_token: bool, default True
 61        If `True`, add the authorization token to the headers.
 62
 63    debug: bool, default False
 64        Verbosity toggle.
 65
 66    kwargs: Any
 67        All other keyword arguments are passed to `requests.request`.
 68
 69    Returns
 70    -------
 71    A `requests.Reponse` object.
 72    """
 73    if method.upper() not in METHODS:
 74        raise ValueError(f"Method '{method}' is not supported.")
 75
 76    verify = self.__dict__.get('verify', None)
 77    if 'verify' not in kwargs and isinstance(verify, bool):
 78        kwargs['verify'] = verify
 79
 80    headers = (
 81        copy.deepcopy(headers)
 82        if isinstance(headers, dict)
 83        else {}
 84    )
 85
 86    if use_token:
 87        headers.update({'Authorization': f'Bearer {self.token}'})
 88
 89    if 'timeout' not in kwargs:
 90        kwargs['timeout'] = STATIC_CONFIG['api']['default_timeout']
 91
 92    request_url = urllib.parse.urljoin(self.url, r_url)
 93    if debug:
 94        dprint(f"[{self}] Sending a '{method.upper()}' request to {request_url}")
 95
 96    return self.session.request(
 97        method.upper(),
 98        request_url,
 99        headers=headers,
100        **kwargs
101    )

Make a request to this APIConnector's endpoint using the in-memory session.

Parameters
  • method (str): The kind of request to make. Accepted values:
    • 'GET'
    • 'OPTIONS'
    • 'HEAD'
    • 'POST'
    • 'PUT'
    • 'PATCH'
    • 'DELETE'
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def get(self, r_url: str, **kwargs: Any) -> requests.models.Response:
104def get(self, r_url: str, **kwargs: Any) -> 'requests.Response':
105    """
106    Wrapper for `requests.get`.
107
108    Parameters
109    ----------
110    r_url: str
111        The relative URL for the endpoint (e.g. `'/pipes'`).
112
113    headers: Optional[Dict[str, Any]], default None
114        The headers to use for the request.
115        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
116
117    use_token: bool, default True
118        If `True`, add the authorization token to the headers.
119
120    debug: bool, default False
121        Verbosity toggle.
122
123    kwargs: Any
124        All other keyword arguments are passed to `requests.request`.
125
126    Returns
127    -------
128    A `requests.Reponse` object.
129
130    """
131    return self.make_request('GET', r_url, **kwargs)

Wrapper for requests.get.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def post(self, r_url: str, **kwargs: Any) -> requests.models.Response:
134def post(self, r_url: str, **kwargs: Any) -> 'requests.Response':
135    """
136    Wrapper for `requests.post`.
137
138    Parameters
139    ----------
140    r_url: str
141        The relative URL for the endpoint (e.g. `'/pipes'`).
142
143    headers: Optional[Dict[str, Any]], default None
144        The headers to use for the request.
145        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
146
147    use_token: bool, default True
148        If `True`, add the authorization token to the headers.
149
150    debug: bool, default False
151        Verbosity toggle.
152
153    kwargs: Any
154        All other keyword arguments are passed to `requests.request`.
155
156    Returns
157    -------
158    A `requests.Reponse` object.
159
160    """
161    return self.make_request('POST', r_url, **kwargs)

Wrapper for requests.post.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def put(self, r_url: str, **kwargs: Any) -> requests.models.Response:
193def put(self, r_url: str, **kwargs: Any) -> 'requests.Response':
194    """
195    Wrapper for `requests.put`.
196
197    Parameters
198    ----------
199    r_url: str
200        The relative URL for the endpoint (e.g. `'/pipes'`).
201
202    headers: Optional[Dict[str, Any]], default None
203        The headers to use for the request.
204        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
205
206    use_token: bool, default True
207        If `True`, add the authorization token to the headers.
208
209    debug: bool, default False
210        Verbosity toggle.
211
212    kwargs: Any
213        All other keyword arguments are passed to `requests.request`.
214
215    Returns
216    -------
217    A `requests.Reponse` object.
218    """
219    return self.make_request('PUT', r_url, **kwargs)

Wrapper for requests.put.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def patch(self, r_url: str, **kwargs: Any) -> requests.models.Response:
164def patch(self, r_url: str, **kwargs: Any) -> 'requests.Response':
165    """
166    Wrapper for `requests.patch`.
167
168    Parameters
169    ----------
170    r_url: str
171        The relative URL for the endpoint (e.g. `'/pipes'`).
172
173    headers: Optional[Dict[str, Any]], default None
174        The headers to use for the request.
175        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
176
177    use_token: bool, default True
178        If `True`, add the authorization token to the headers.
179
180    debug: bool, default False
181        Verbosity toggle.
182
183    kwargs: Any
184        All other keyword arguments are passed to `requests.request`.
185
186    Returns
187    -------
188    A `requests.Reponse` object.
189    """
190    return self.make_request('PATCH', r_url, **kwargs)

Wrapper for requests.patch.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def delete(self, r_url: str, **kwargs: Any) -> requests.models.Response:
222def delete(self, r_url: str, **kwargs: Any) -> 'requests.Response':
223    """
224    Wrapper for `requests.delete`.
225
226    Parameters
227    ----------
228    r_url: str
229        The relative URL for the endpoint (e.g. `'/pipes'`).
230
231    headers: Optional[Dict[str, Any]], default None
232        The headers to use for the request.
233        If `use_token` is `True`, the authorization token will be added to a copy of these headers.
234
235    use_token: bool, default True
236        If `True`, add the authorization token to the headers.
237
238    debug: bool, default False
239        Verbosity toggle.
240
241    kwargs: Any
242        All other keyword arguments are passed to `requests.request`.
243
244    Returns
245    -------
246    A `requests.Reponse` object.
247    """
248    return self.make_request('DELETE', r_url, **kwargs)

Wrapper for requests.delete.

Parameters
  • r_url (str): The relative URL for the endpoint (e.g. '/pipes').
  • headers (Optional[Dict[str, Any]], default None): The headers to use for the request. If use_token is True, the authorization token will be added to a copy of these headers.
  • use_token (bool, default True): If True, add the authorization token to the headers.
  • debug (bool, default False): Verbosity toggle.
  • kwargs (Any): All other keyword arguments are passed to requests.request.
Returns
  • A requests.Reponse object.
def wget( self, r_url: str, dest: Union[str, pathlib.Path, NoneType] = None, headers: Optional[Dict[str, Any]] = None, use_token: bool = True, debug: bool = False, **kw: Any) -> pathlib.Path:
251def wget(
252    self,
253    r_url: str,
254    dest: Optional[Union[str, pathlib.Path]] = None,
255    headers: Optional[Dict[str, Any]] = None,
256    use_token: bool = True,
257    debug: bool = False,
258    **kw: Any
259) -> pathlib.Path:
260    """Mimic wget with requests."""
261    from meerschaum.utils.misc import wget
262    if headers is None:
263        headers = {}
264    if use_token:
265        headers.update({'Authorization': f'Bearer {self.token}'})
266    request_url = urllib.parse.urljoin(self.url, r_url)
267    if debug:
268        dprint(
269            f"[{self}] Downloading {request_url}"
270            + (f' to {dest}' if dest is not None else '')
271            + "..."
272        )
273    return wget(request_url, dest=dest, headers=headers, **kw)

Mimic wget with requests.

def get_actions(self):
24def get_actions(self):
25    """Get available actions from the API instance."""
26    return self.get(ACTIONS_ENDPOINT)

Get available actions from the API instance.

def do_action(self, sysargs: List[str]) -> Tuple[bool, str]:
29def do_action(self, sysargs: List[str]) -> SuccessTuple:
30    """
31    Execute a Meerschaum action remotely.
32    """
33    return asyncio.run(self.do_action_async(sysargs))

Execute a Meerschaum action remotely.

async def do_action_async( self, sysargs: List[str], callback_function: Callable[[str], NoneType] = functools.partial(<built-in function print>, end='')) -> Tuple[bool, str]:
36async def do_action_async(
37    self,
38    sysargs: List[str],
39    callback_function: Callable[[str], None] = partial(print, end=''),
40) -> SuccessTuple:
41    """
42    Execute an action as a temporary remote job.
43    """
44    from meerschaum._internal.arguments import remove_api_executor_keys
45    from meerschaum.utils.misc import generate_password
46    sysargs = remove_api_executor_keys(sysargs)
47
48    job_name = TEMP_PREFIX + generate_password(12)
49    job = mrsm.Job(job_name, sysargs, executor_keys=str(self))
50
51    start_success, start_msg = job.start()
52    if not start_success:
53        return start_success, start_msg
54
55    await job.monitor_logs_async(
56        callback_function=callback_function,
57        stop_on_exit=True,
58        strip_timestamps=True,
59    )
60
61    success, msg = job.result
62    job.delete()
63    return success, msg

Execute an action as a temporary remote job.

def do_action_legacy( self, action: Optional[List[str]] = None, sysargs: Optional[List[str]] = None, debug: bool = False, **kw) -> Tuple[bool, str]:
 66def do_action_legacy(
 67    self,
 68    action: Optional[List[str]] = None,
 69    sysargs: Optional[List[str]] = None,
 70    debug: bool = False,
 71    **kw
 72) -> SuccessTuple:
 73    """
 74    NOTE: This method is deprecated.
 75    Please use `do_action()` or `do_action_async()`.
 76
 77    Execute a Meerschaum action remotely.
 78
 79    If `sysargs` are provided, parse those instead.
 80    Otherwise infer everything from keyword arguments.
 81
 82    Examples
 83    --------
 84    >>> conn = mrsm.get_connector('api:main')
 85    >>> conn.do_action(['show', 'pipes'])
 86    (True, "Success")
 87    >>> conn.do_action(['show', 'arguments'], name='test')
 88    (True, "Success")
 89    """
 90    import sys, json
 91    from meerschaum.utils.debug import dprint
 92    from meerschaum._internal.static import STATIC_CONFIG
 93    from meerschaum.utils.misc import json_serialize_datetime
 94    if action is None:
 95        action = []
 96
 97    if sysargs is not None and action and action[0] == '':
 98        from meerschaum._internal.arguments import parse_arguments
 99        if debug:
100            dprint(f"Parsing sysargs:\n{sysargs}")
101        json_dict = parse_arguments(sysargs)
102    else:
103        json_dict = kw
104        json_dict['action'] = action
105        if 'noask' not in kw:
106            json_dict['noask'] = True
107        if 'yes' not in kw:
108            json_dict['yes'] = True
109        if debug:
110            json_dict['debug'] = debug
111
112    root_action = json_dict['action'][0]
113    del json_dict['action'][0]
114    r_url = f"{STATIC_CONFIG['api']['endpoints']['actions']}/{root_action}"
115    
116    if debug:
117        from meerschaum.utils.formatting import pprint
118        dprint(f"Sending data to '{self.url + r_url}':")
119        pprint(json_dict, stream=sys.stderr)
120
121    response = self.post(
122        r_url,
123        data = json.dumps(json_dict, default=json_serialize_datetime),
124        debug = debug,
125    )
126    try:
127        response_list = json.loads(response.text)
128        if isinstance(response_list, dict) and 'detail' in response_list:
129            return False, response_list['detail']
130    except Exception as e:
131        print(f"Invalid response: {response}")
132        print(e)
133        return False, response.text
134    if debug:
135        dprint(response)
136    try:
137        return response_list[0], response_list[1]
138    except Exception as e:
139        return False, f"Failed to parse result from action '{root_action}'"

NOTE: This method is deprecated. Please use do_action() or do_action_async().

Execute a Meerschaum action remotely.

If sysargs are provided, parse those instead. Otherwise infer everything from keyword arguments.

Examples
>>> conn = mrsm.get_connector('api:main')
>>> conn.do_action(['show', 'pipes'])
(True, "Success")
>>> conn.do_action(['show', 'arguments'], name='test')
(True, "Success")
def get_mrsm_version(self, **kw) -> Optional[str]:
13def get_mrsm_version(self, **kw) -> Optional[str]:
14    """
15    Return the Meerschaum version of the API instance.
16    """
17    from meerschaum._internal.static import STATIC_CONFIG
18    try:
19        j = self.get(
20            STATIC_CONFIG['api']['endpoints']['version'] + '/mrsm',
21            use_token=False,
22            **kw
23        ).json()
24    except Exception:
25        return None
26    if isinstance(j, dict) and 'detail' in j:
27        return None
28    return j

Return the Meerschaum version of the API instance.

def get_chaining_status(self, **kw) -> Optional[bool]:
31def get_chaining_status(self, **kw) -> Optional[bool]:
32    """
33    Fetch the chaining status of the API instance.
34    """
35    from meerschaum._internal.static import STATIC_CONFIG
36    try:
37        response = self.get(
38            STATIC_CONFIG['api']['endpoints']['chaining'],
39            use_token = True,
40            **kw
41        )
42        if not response:
43            return None
44    except Exception:
45        return None
46
47    return response.json()

Fetch the chaining status of the API instance.

def get_pipe_instance_keys(self, pipe: meerschaum.Pipe) -> Optional[str]:
35def get_pipe_instance_keys(self, pipe: mrsm.Pipe) -> Union[str, None]:
36    """
37    Return the configured instance keys for a pipe if set,
38    else fall back to the default `instance_keys` for this `APIConnector`.
39    """
40    return pipe.parameters.get('instance_keys', self.instance_keys)

Return the configured instance keys for a pipe if set, else fall back to the default instance_keys for this APIConnector.

def register_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
43def register_pipe(
44    self,
45    pipe: mrsm.Pipe,
46    debug: bool = False
47) -> SuccessTuple:
48    """Submit a POST to the API to register a new Pipe object.
49    Returns a tuple of (success_bool, response_dict).
50    """
51    from meerschaum.utils.debug import dprint
52    r_url = pipe_r_url(pipe)
53    response = self.post(
54        r_url + '/register',
55        json=pipe._attributes.get('parameters', {}),
56        params={'instance_keys': self.get_pipe_instance_keys(pipe)},
57        debug=debug,
58    )
59    if debug:
60        dprint(response.text)
61
62    if not response:
63        return False, response.text
64
65    response_data = response.json()
66    if isinstance(response_data, list):
67        response_tuple = response_data[0], response_data[1]
68    elif 'detail' in response.json():
69        response_tuple = response.__bool__(), response_data['detail']
70    else:
71        response_tuple = response.__bool__(), response.text
72    return response_tuple

Submit a POST to the API to register a new Pipe object. Returns a tuple of (success_bool, response_dict).

def fetch_pipes_keys( self, connector_keys: Optional[List[str]] = None, metric_keys: Optional[List[str]] = None, location_keys: Optional[List[str]] = None, tags: Optional[List[str]] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False) -> List[Union[Tuple[str, str, Optional[str]], Tuple[str, str, Optional[str], List[str]], Tuple[str, str, Optional[str], Dict[str, Any]]]]:
108def fetch_pipes_keys(
109    self,
110    connector_keys: Optional[List[str]] = None,
111    metric_keys: Optional[List[str]] = None,
112    location_keys: Optional[List[str]] = None,
113    tags: Optional[List[str]] = None,
114    params: Optional[Dict[str, Any]] = None,
115    debug: bool = False
116) -> List[
117        Union[
118            Tuple[str, str, Union[str, None]],
119            Tuple[str, str, Union[str, None], List[str]],
120            Tuple[str, str, Union[str, None], Dict[str, Any]]
121        ]
122    ]:
123    """
124    Fetch registered Pipes' keys from the API.
125    
126    Parameters
127    ----------
128    connector_keys: Optional[List[str]], default None
129        The connector keys for the query.
130
131    metric_keys: Optional[List[str]], default None
132        The metric keys for the query.
133
134    location_keys: Optional[List[str]], default None
135        The location keys for the query.
136
137    tags: Optional[List[str]], default None
138        A list of tags for the query.
139
140    params: Optional[Dict[str, Any]], default None
141        A parameters dictionary for filtering against the `pipes` table
142        (e.g. `{'connector_keys': 'plugin:foo'}`).
143        Not recommeded to be used.
144
145    debug: bool, default False
146        Verbosity toggle.
147
148    Returns
149    -------
150    A list of tuples containing pipes' keys.
151    """
152    from meerschaum._internal.static import STATIC_CONFIG
153    if connector_keys is None:
154        connector_keys = []
155    if metric_keys is None:
156        metric_keys = []
157    if location_keys is None:
158        location_keys = []
159    if tags is None:
160        tags = []
161
162    r_url = STATIC_CONFIG['api']['endpoints']['pipes'] + '/keys'
163    try:
164        j = self.get(
165            r_url,
166            params={
167                'connector_keys': json.dumps(connector_keys),
168                'metric_keys': json.dumps(metric_keys),
169                'location_keys': json.dumps(location_keys),
170                'tags': json.dumps(tags),
171                'params': json.dumps(params),
172                'instance_keys': self.instance_keys,
173            },
174            debug=debug
175        ).json()
176    except Exception as e:
177        import traceback
178        traceback.print_exc()
179        error(str(e))
180
181    if 'detail' in j:
182        error(j['detail'], stack=False)
183    return [tuple(r) for r in j]

Fetch registered Pipes' keys from the API.

Parameters
  • connector_keys (Optional[List[str]], default None): The connector keys for the query.
  • metric_keys (Optional[List[str]], default None): The metric keys for the query.
  • location_keys (Optional[List[str]], default None): The location keys for the query.
  • tags (Optional[List[str]], default None): A list of tags for the query.
  • params (Optional[Dict[str, Any]], default None): A parameters dictionary for filtering against the pipes table (e.g. {'connector_keys': 'plugin:foo'}). Not recommeded to be used.
  • debug (bool, default False): Verbosity toggle.
Returns
  • A list of tuples containing pipes' keys.
def edit_pipe( self, pipe: meerschaum.Pipe, patch: bool = False, debug: bool = False) -> Tuple[bool, str]:
 75def edit_pipe(
 76    self,
 77    pipe: mrsm.Pipe,
 78    patch: bool = False,
 79    debug: bool = False,
 80) -> SuccessTuple:
 81    """Submit a PATCH to the API to edit an existing Pipe object.
 82    Returns a tuple of (success_bool, response_dict).
 83    """
 84    from meerschaum.utils.debug import dprint
 85    ### NOTE: if `parameters` is supplied in the Pipe constructor,
 86    ###       then `pipe.parameters` will exist and not be fetched from the database.
 87    r_url = pipe_r_url(pipe)
 88    response = self.patch(
 89        r_url + '/edit',
 90        params={'patch': patch, 'instance_keys': self.get_pipe_instance_keys(pipe)},
 91        json=pipe.get_parameters(apply_symlinks=False),
 92        debug=debug,
 93    )
 94    if debug:
 95        dprint(response.text)
 96
 97    response_data = response.json()
 98
 99    if isinstance(response.json(), list):
100        response_tuple = response_data[0], response_data[1]
101    elif 'detail' in response.json():
102        response_tuple = response.__bool__(), response_data['detail']
103    else:
104        response_tuple = response.__bool__(), response.text
105    return response_tuple

Submit a PATCH to the API to edit an existing Pipe object. Returns a tuple of (success_bool, response_dict).

def sync_pipe( self, pipe: meerschaum.Pipe, df: "Optional[Union['pd.DataFrame', Dict[Any, Any], str]]" = None, chunksize: Optional[int] = -1, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
186def sync_pipe(
187    self,
188    pipe: mrsm.Pipe,
189    df: Optional[Union['pd.DataFrame', Dict[Any, Any], str]] = None,
190    chunksize: Optional[int] = -1,
191    debug: bool = False,
192    **kw: Any
193) -> SuccessTuple:
194    """Sync a DataFrame into a Pipe."""
195    from decimal import Decimal
196    from meerschaum.utils.debug import dprint
197    from meerschaum.utils.dtypes import json_serialize_value
198    from meerschaum.utils.misc import items_str, interval_str
199    from meerschaum.config import get_config
200    from meerschaum.utils.packages import attempt_import
201    from meerschaum.utils.dataframe import get_special_cols, to_json
202    begin = time.perf_counter()
203    more_itertools = attempt_import('more_itertools')
204    if df is None:
205        msg = f"DataFrame is `None`. Cannot sync {pipe}."
206        return False, msg
207
208    def get_json_str(c):
209        if isinstance(c, str):
210            return c
211        if isinstance(c, (dict, list, tuple)):
212            return json.dumps(c, default=json_serialize_value)
213        return to_json(c, orient='columns', geometry_format='wkb_hex')
214
215    df = json.loads(df) if isinstance(df, str) else df
216
217    _chunksize: Optional[int] = (1 if chunksize is None else (
218        get_config('system', 'connectors', 'sql', 'chunksize') if chunksize == -1
219        else chunksize
220    ))
221    keys: List[str] = list(df.columns)
222    chunks = []
223    if hasattr(df, 'index'):
224        df = df.reset_index(drop=True)
225        is_dask = 'dask' in df.__module__
226        chunks = (
227            (df.iloc[i] for i in more_itertools.chunked(df.index, _chunksize))
228            if not is_dask
229            else [partition.compute() for partition in df.partitions]
230        )
231
232    elif isinstance(df, dict):
233        ### `_chunks` is a dict of lists of dicts.
234        ### e.g. {'a' : [ {'a':[1, 2]}, {'a':[3, 4]} ] }
235        _chunks = {k: [] for k in keys}
236        for k in keys:
237            chunk_iter = more_itertools.chunked(df[k], _chunksize)
238            for l in chunk_iter:
239                _chunks[k].append({k: l})
240
241        ### `chunks` is a list of dicts (e.g. orient by rows in pandas JSON).
242        for k, l in _chunks.items():
243            for i, c in enumerate(l):
244                try:
245                    chunks[i].update(c)
246                except IndexError:
247                    chunks.append(c)
248    elif isinstance(df, list):
249        chunks = (df[i] for i in more_itertools.chunked(df, _chunksize))
250
251    ### Send columns in case the user has defined them locally.
252    request_params = kw.copy()
253    if pipe.columns:
254        request_params['columns'] = json.dumps(pipe.columns)
255    request_params['instance_keys'] = self.get_pipe_instance_keys(pipe)
256    r_url = pipe_r_url(pipe) + '/data'
257
258    rowcount = 0
259    num_success_chunks = 0
260    for i, c in enumerate(chunks):
261        if debug:
262            dprint(f"[{self}] Posting chunk {i} to {r_url}...")
263        if len(c) == 0:
264            if debug:
265                dprint(f"[{self}] Skipping empty chunk...")
266            continue
267        json_str = get_json_str(c)
268
269        try:
270            response = self.post(
271                r_url,
272                params=request_params,
273                data=json_str,
274                debug=debug,
275            )
276        except Exception as e:
277            msg = f"Failed to post a chunk to {pipe}:\n{e}"
278            warn(msg)
279            return False, msg
280            
281        if not response:
282            return False, f"Failed to sync a chunk:\n{response.text}"
283
284        try:
285            j = json.loads(response.text)
286        except Exception as e:
287            return False, f"Failed to parse response from syncing {pipe}:\n{e}"
288
289        if isinstance(j, dict) and 'detail' in j:
290            return False, j['detail']
291
292        try:
293            j = tuple(j)
294        except Exception:
295            return False, response.text
296
297        if debug:
298            dprint("Received response: " + str(j))
299        if not j[0]:
300            return j
301
302        rowcount += len(c)
303        num_success_chunks += 1
304
305    success_tuple = True, (
306        f"It took {interval_str(timedelta(seconds=(time.perf_counter() - begin)))} "
307        + f"to sync {rowcount:,} row"
308        + ('s' if rowcount != 1 else '')
309        + f" across {num_success_chunks:,} chunk" + ('s' if num_success_chunks != 1 else '') +
310        f" to {pipe}."
311    )
312    return success_tuple

Sync a DataFrame into a Pipe.

def delete_pipe( self, pipe: Optional[meerschaum.Pipe] = None, debug: bool = False) -> Tuple[bool, str]:
315def delete_pipe(
316    self,
317    pipe: Optional[mrsm.Pipe] = None,
318    debug: bool = False,
319) -> SuccessTuple:
320    """Delete a Pipe and drop its table."""
321    if pipe is None:
322        error("Pipe cannot be None.")
323    r_url = pipe_r_url(pipe)
324    response = self.delete(
325        r_url + '/delete',
326        params={'instance_keys': self.get_pipe_instance_keys(pipe)},
327        debug=debug,
328    )
329    if debug:
330        dprint(response.text)
331
332    response_data = response.json()
333    if isinstance(response.json(), list):
334        response_tuple = response_data[0], response_data[1]
335    elif 'detail' in response.json():
336        response_tuple = response.__bool__(), response_data['detail']
337    else:
338        response_tuple = response.__bool__(), response.text
339    return response_tuple

Delete a Pipe and drop its table.

def get_pipe_data( self, pipe: meerschaum.Pipe, select_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, begin: Union[str, datetime.datetime, int, NoneType] = None, end: Union[str, datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, as_chunks: bool = False, debug: bool = False, **kw: Any) -> Optional[pandas.core.frame.DataFrame]:
342def get_pipe_data(
343    self,
344    pipe: mrsm.Pipe,
345    select_columns: Optional[List[str]] = None,
346    omit_columns: Optional[List[str]] = None,
347    begin: Union[str, datetime, int, None] = None,
348    end: Union[str, datetime, int, None] = None,
349    params: Optional[Dict[str, Any]] = None,
350    as_chunks: bool = False,
351    debug: bool = False,
352    **kw: Any
353) -> Union[pandas.DataFrame, None]:
354    """Fetch data from the API."""
355    r_url = pipe_r_url(pipe)
356    while True:
357        try:
358            response = self.get(
359                r_url + "/data",
360                params={
361                    'select_columns': json.dumps(select_columns),
362                    'omit_columns': json.dumps(omit_columns),
363                    'begin': begin,
364                    'end': end,
365                    'params': json.dumps(params, default=str),
366                    'instance': self.get_pipe_instance_keys(pipe),
367                    'as_chunks': as_chunks,
368                },
369                debug=debug
370            )
371            if not response.ok:
372                return None
373            j = response.json()
374        except Exception as e:
375            warn(f"Failed to get data for {pipe}:\n{e}")
376            return None
377        if isinstance(j, dict) and 'detail' in j:
378            return False, j['detail']
379        break
380
381    from meerschaum.utils.dataframe import parse_df_datetimes, add_missing_cols_to_df
382    from meerschaum.utils.dtypes import are_dtypes_equal
383    try:
384        df = parse_df_datetimes(
385            j,
386            ignore_cols=[
387                col
388                for col, dtype in pipe.dtypes.items()
389                if not are_dtypes_equal(str(dtype), 'datetime')
390            ],
391            strip_timezone=(pipe.tzinfo is None),
392            debug=debug,
393        )
394    except Exception as e:
395        warn(f"Failed to parse response for {pipe}:\n{e}")
396        return None
397
398    if len(df.columns) == 0:
399        return add_missing_cols_to_df(df, pipe.dtypes)
400
401    return df

Fetch data from the API.

def get_pipe_id( self, pipe: meerschaum.Pipe, debug: bool = False) -> Union[int, str, NoneType]:
404def get_pipe_id(
405    self,
406    pipe: mrsm.Pipe,
407    debug: bool = False,
408) -> Union[int, str, None]:
409    """Get a Pipe's ID from the API."""
410    from meerschaum.utils.misc import is_int
411    r_url = pipe_r_url(pipe)
412    response = self.get(
413        r_url + '/id',
414        params={
415            'instance': self.get_pipe_instance_keys(pipe),
416        },
417        debug=debug,
418    )
419    if debug:
420        dprint(f"Got pipe ID: {response.text}")
421    try:
422        if is_int(response.text):
423            return int(response.text)
424        if response.text and response.text[0] != '{':
425            return response.text
426    except Exception as e:
427        warn(f"Failed to get the ID for {pipe}:\n{e}")
428    return None

Get a Pipe's ID from the API.

def get_pipe_attributes( self, pipe: meerschaum.Pipe, debug: bool = False) -> Dict[str, Any]:
431def get_pipe_attributes(
432    self,
433    pipe: mrsm.Pipe,
434    debug: bool = False,
435) -> Dict[str, Any]:
436    """Get a Pipe's attributes from the API
437
438    Parameters
439    ----------
440    pipe: meerschaum.Pipe
441        The pipe whose attributes we are fetching.
442        
443    Returns
444    -------
445    A dictionary of a pipe's attributes.
446    If the pipe does not exist, return an empty dictionary.
447    """
448    r_url = pipe_r_url(pipe)
449    response = self.get(
450        r_url + '/attributes',
451        params={
452            'instance': self.get_pipe_instance_keys(pipe),
453        },
454        debug=debug
455    )
456    try:
457        return json.loads(response.text)
458    except Exception as e:
459        warn(f"Failed to get the attributes for {pipe}:\n{e}")
460    return {}

Get a Pipe's attributes from the API

Parameters
Returns
  • A dictionary of a pipe's attributes.
  • If the pipe does not exist, return an empty dictionary.
def get_sync_time( self, pipe: meerschaum.Pipe, params: Optional[Dict[str, Any]] = None, newest: bool = True, debug: bool = False) -> Union[datetime.datetime, int, NoneType]:
463def get_sync_time(
464    self,
465    pipe: mrsm.Pipe,
466    params: Optional[Dict[str, Any]] = None,
467    newest: bool = True,
468    debug: bool = False,
469) -> Union[datetime, int, None]:
470    """Get a Pipe's most recent datetime value from the API.
471
472    Parameters
473    ----------
474    pipe: meerschaum.Pipe
475        The pipe to select from.
476
477    params: Optional[Dict[str, Any]], default None
478        Optional params dictionary to build the WHERE clause.
479
480    newest: bool, default True
481        If `True`, get the most recent datetime (honoring `params`).
482        If `False`, get the oldest datetime (ASC instead of DESC).
483
484    Returns
485    -------
486    The most recent (or oldest if `newest` is `False`) datetime of a pipe,
487    rounded down to the closest minute.
488    """
489    from meerschaum.utils.misc import is_int
490    from meerschaum.utils.warnings import warn
491    r_url = pipe_r_url(pipe)
492    response = self.get(
493        r_url + '/sync_time',
494        json=params,
495        params={
496            'instance': self.get_pipe_instance_keys(pipe),
497            'newest': newest,
498            'debug': debug,
499        },
500        debug=debug,
501    )
502    if not response:
503        warn(f"Failed to get the sync time for {pipe}:\n" + response.text)
504        return None
505
506    j = response.json()
507    if j is None:
508        dt = None
509    else:
510        try:
511            dt = (
512                datetime.fromisoformat(j)
513                if not is_int(j)
514                else int(j)
515            )
516        except Exception as e:
517            warn(f"Failed to parse the sync time '{j}' for {pipe}:\n{e}")
518            dt = None
519    return dt

Get a Pipe's most recent datetime value from the API.

Parameters
  • pipe (meerschaum.Pipe): The pipe to select from.
  • params (Optional[Dict[str, Any]], default None): Optional params dictionary to build the WHERE clause.
  • newest (bool, default True): If True, get the most recent datetime (honoring params). If False, get the oldest datetime (ASC instead of DESC).
Returns
  • The most recent (or oldest if newest is False) datetime of a pipe,
  • rounded down to the closest minute.
def pipe_exists(self, pipe: meerschaum.Pipe, debug: bool = False) -> bool:
522def pipe_exists(
523    self,
524    pipe: mrsm.Pipe,
525    debug: bool = False
526) -> bool:
527    """Check the API to see if a Pipe exists.
528
529    Parameters
530    ----------
531    pipe: 'meerschaum.Pipe'
532        The pipe which were are querying.
533        
534    Returns
535    -------
536    A bool indicating whether a pipe's underlying table exists.
537    """
538    from meerschaum.utils.debug import dprint
539    from meerschaum.utils.warnings import warn
540    r_url = pipe_r_url(pipe)
541    response = self.get(
542        r_url + '/exists',
543        params={
544            'instance': self.get_pipe_instance_keys(pipe),
545        },
546        debug=debug,
547    )
548    if not response:
549        warn(f"Failed to check if {pipe} exists:\n{response.text}")
550        return False
551    if debug:
552        dprint("Received response: " + str(response.text))
553    j = response.json()
554    if isinstance(j, dict) and 'detail' in j:
555        warn(j['detail'])
556    return j

Check the API to see if a Pipe exists.

Parameters
Returns
  • A bool indicating whether a pipe's underlying table exists.
def create_metadata(self, debug: bool = False) -> bool:
559def create_metadata(
560    self,
561    debug: bool = False
562) -> bool:
563    """Create metadata tables.
564
565    Returns
566    -------
567    A bool indicating success.
568    """
569    from meerschaum.utils.debug import dprint
570    from meerschaum._internal.static import STATIC_CONFIG
571    r_url = STATIC_CONFIG['api']['endpoints']['metadata']
572    response = self.post(r_url, debug=debug)
573    if debug:
574        dprint("Create metadata response: {response.text}")
575    try:
576        _ = json.loads(response.text)
577    except Exception as e:
578        warn(f"Failed to create metadata on {self}:\n{e}")
579    return False

Create metadata tables.

Returns
  • A bool indicating success.
def get_pipe_rowcount( self, pipe: meerschaum.Pipe, begin: Union[str, datetime.datetime, int, NoneType] = None, end: Union[str, datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, remote: bool = False, debug: bool = False) -> int:
582def get_pipe_rowcount(
583    self,
584    pipe: mrsm.Pipe,
585    begin: Union[str, datetime, int, None] = None,
586    end: Union[str, datetime, int, None] = None,
587    params: Optional[Dict[str, Any]] = None,
588    remote: bool = False,
589    debug: bool = False,
590) -> int:
591    """Get a pipe's row count from the API.
592
593    Parameters
594    ----------
595    pipe: 'meerschaum.Pipe':
596        The pipe whose row count we are counting.
597        
598    begin: Union[str, datetime, int, None], default None
599        If provided, bound the count by this datetime.
600
601    end: Union[str, datetime, int, None], default None
602        If provided, bound the count by this datetime.
603
604    params: Optional[Dict[str, Any]], default None
605        If provided, bound the count by these parameters.
606
607    remote: bool, default False
608        If `True`, return the rowcount for the fetch definition.
609
610    Returns
611    -------
612    The number of rows in the pipe's table, bound the given parameters.
613    If the table does not exist, return 0.
614    """
615    r_url = pipe_r_url(pipe)
616    response = self.get(
617        r_url + "/rowcount",
618        json = params,
619        params = {
620            'begin': begin,
621            'end': end,
622            'remote': remote,
623            'instance': self.get_pipe_instance_keys(pipe),
624        },
625        debug = debug
626    )
627    if not response:
628        warn(f"Failed to get the rowcount for {pipe}:\n{response.text}")
629        return 0
630    try:
631        return int(json.loads(response.text))
632    except Exception as e:
633        warn(f"Failed to get the rowcount for {pipe}:\n{e}")
634    return 0

Get a pipe's row count from the API.

Parameters
  • pipe ('meerschaum.Pipe':): The pipe whose row count we are counting.
  • begin (Union[str, datetime, int, None], default None): If provided, bound the count by this datetime.
  • end (Union[str, datetime, int, None], default None): If provided, bound the count by this datetime.
  • params (Optional[Dict[str, Any]], default None): If provided, bound the count by these parameters.
  • remote (bool, default False): If True, return the rowcount for the fetch definition.
Returns
  • The number of rows in the pipe's table, bound the given parameters.
  • If the table does not exist, return 0.
def drop_pipe( self, pipe: meerschaum.Pipe, debug: bool = False) -> Tuple[bool, str]:
637def drop_pipe(
638    self,
639    pipe: mrsm.Pipe,
640    debug: bool = False
641) -> SuccessTuple:
642    """
643    Drop a pipe's table but maintain its registration.
644
645    Parameters
646    ----------
647    pipe: meerschaum.Pipe:
648        The pipe to be dropped.
649        
650    Returns
651    -------
652    A success tuple (bool, str).
653    """
654    from meerschaum.utils.warnings import error
655    from meerschaum.utils.debug import dprint
656    if pipe is None:
657        error("Pipe cannot be None.")
658    r_url = pipe_r_url(pipe)
659    response = self.delete(
660        r_url + '/drop',
661        params={
662            'instance': self.get_pipe_instance_keys(pipe),
663        },
664        debug=debug,
665    )
666    if debug:
667        dprint(response.text)
668
669    try:
670        data = response.json()
671    except Exception as e:
672        return False, f"Failed to drop {pipe}."
673
674    if isinstance(data, list):
675        response_tuple = data[0], data[1]
676    elif 'detail' in response.json():
677        response_tuple = response.__bool__(), data['detail']
678    else:
679        response_tuple = response.__bool__(), response.text
680
681    return response_tuple

Drop a pipe's table but maintain its registration.

Parameters
Returns
  • A success tuple (bool, str).
def clear_pipe( self, pipe: meerschaum.Pipe, begin: Union[str, datetime.datetime, int, NoneType] = None, end: Union[str, datetime.datetime, int, NoneType] = None, params: Optional[Dict[str, Any]] = None, debug: bool = False, **kw) -> Tuple[bool, str]:
684def clear_pipe(
685    self,
686    pipe: mrsm.Pipe,
687    begin: Union[str, datetime, int, None] = None,
688    end: Union[str, datetime, int, None] = None,
689    params: Optional[Dict[str, Any]] = None,
690    debug: bool = False,
691    **kw
692) -> SuccessTuple:
693    """
694    Delete rows in a pipe's table.
695
696    Parameters
697    ----------
698    pipe: meerschaum.Pipe
699        The pipe with rows to be deleted.
700        
701    Returns
702    -------
703    A success tuple.
704    """
705    r_url = pipe_r_url(pipe)
706    response = self.delete(
707        r_url + '/clear',
708        params={
709            'begin': begin,
710            'end': end,
711            'params': json.dumps(params),
712            'instance': self.get_pipe_instance_keys(pipe),
713        },
714        debug=debug,
715    )
716    if debug:
717        dprint(response.text)
718
719    try:
720        data = response.json()
721    except Exception as e:
722        return False, f"Failed to clear {pipe} with constraints {begin=}, {end=}, {params=}."
723
724    if isinstance(data, list):
725        response_tuple = data[0], data[1]
726    elif 'detail' in response.json():
727        response_tuple = response.__bool__(), data['detail']
728    else:
729        response_tuple = response.__bool__(), response.text
730
731    return response_tuple

Delete rows in a pipe's table.

Parameters
Returns
  • A success tuple.
def get_pipe_columns_types( self, pipe: meerschaum.Pipe, debug: bool = False) -> Optional[Dict[str, str]]:
734def get_pipe_columns_types(
735    self,
736    pipe: mrsm.Pipe,
737    debug: bool = False,
738) -> Union[Dict[str, str], None]:
739    """
740    Fetch the columns and types of the pipe's table.
741
742    Parameters
743    ----------
744    pipe: meerschaum.Pipe
745        The pipe whose columns to be queried.
746
747    Returns
748    -------
749    A dictionary mapping column names to their database types.
750
751    Examples
752    --------
753    >>> {
754    ...   'dt': 'TIMESTAMP WITHOUT TIMEZONE',
755    ...   'id': 'BIGINT',
756    ...   'val': 'DOUBLE PRECISION',
757    ... }
758    >>>
759    """
760    r_url = pipe_r_url(pipe) + '/columns/types'
761    response = self.get(
762        r_url,
763        params={
764            'instance': self.get_pipe_instance_keys(pipe),
765        },
766        debug=debug,
767    )
768    j = response.json()
769    if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1:
770        warn(j['detail'])
771        return None
772    if not isinstance(j, dict):
773        warn(response.text)
774        return None
775    return j

Fetch the columns and types of the pipe's table.

Parameters
Returns
  • A dictionary mapping column names to their database types.
Examples
>>> {
...   'dt': 'TIMESTAMP WITHOUT TIMEZONE',
...   'id': 'BIGINT',
...   'val': 'DOUBLE PRECISION',
... }
>>>
def get_pipe_columns_indices( self, pipe: meerschaum.Pipe, debug: bool = False) -> Optional[Dict[str, str]]:
778def get_pipe_columns_indices(
779    self,
780    pipe: mrsm.Pipe,
781    debug: bool = False,
782) -> Union[Dict[str, str], None]:
783    """
784    Fetch the index information for a pipe.
785
786    Parameters
787    ----------
788    pipe: mrsm.Pipe
789        The pipe whose columns to be queried.
790
791    Returns
792    -------
793    A dictionary mapping column names to a list of associated index information.
794    """
795    r_url = pipe_r_url(pipe) + '/columns/indices'
796    response = self.get(
797        r_url,
798        params={
799            'instance': self.get_pipe_instance_keys(pipe),
800        },
801        debug=debug,
802    )
803    j = response.json()
804    if isinstance(j, dict) and 'detail' in j and len(j.keys()) == 1:
805        warn(j['detail'])
806        return None
807    if not isinstance(j, dict):
808        warn(response.text)
809        return None
810    return j

Fetch the index information for a pipe.

Parameters
  • pipe (mrsm.Pipe): The pipe whose columns to be queried.
Returns
  • A dictionary mapping column names to a list of associated index information.
def fetch( self, pipe: meerschaum.Pipe, begin: Union[datetime.datetime, str, int] = '', end: Union[datetime.datetime, int] = None, params: 'Optional[Dict, Any]' = None, debug: bool = False, **kw: Any) -> "Iterator['pd.DataFrame']":
16def fetch(
17        self,
18        pipe: mrsm.Pipe,
19        begin: Union[datetime, str, int] = '',
20        end: Union[datetime, int] = None,
21        params: Optional[Dict, Any] = None,
22        debug: bool = False,
23        **kw: Any
24    ) -> Iterator['pd.DataFrame']:
25    """Get the Pipe data from the remote Pipe."""
26    from meerschaum.utils.debug import dprint
27    from meerschaum.utils.warnings import warn, error
28    from meerschaum.config._patch import apply_patch_to_config
29
30    fetch_params = pipe.parameters.get('fetch', {})
31    if not fetch_params:
32        warn(f"Missing 'fetch' parameters for {pipe}.", stack=False)
33        return None
34
35    pipe_meta = fetch_params.get('pipe', {})
36    ### Legacy: check for `connector_keys`, etc. at the root.
37    if not pipe_meta:
38        ck, mk, lk = (
39            fetch_params.get('connector_keys', None),
40            fetch_params.get('metric_key', None),
41            fetch_params.get('location_key', None),
42        )
43        if not ck or not mk:
44            warn(f"Missing `fetch:pipe` keys for {pipe}.", stack=False)
45            return None
46
47        pipe_meta.update({
48            'connector': ck,
49            'metric': mk,
50            'location': lk,
51        })
52
53    pipe_meta['instance'] = self
54    source_pipe = mrsm.Pipe(**pipe_meta)
55
56    _params = copy.deepcopy(params) if params is not None else {}
57    _params = apply_patch_to_config(_params, fetch_params.get('params', {}))
58    select_columns = fetch_params.get('select_columns', [])
59    omit_columns = fetch_params.get('omit_columns', [])
60
61    return source_pipe.get_data(
62        select_columns = select_columns,
63        omit_columns = omit_columns,
64        begin = begin,
65        end = end,
66        params = _params,
67        debug = debug,
68        as_iterator = True,
69    )

Get the Pipe data from the remote Pipe.

def register_plugin( self, plugin: meerschaum.Plugin, make_archive: bool = True, debug: bool = False) -> Tuple[bool, str]:
24def register_plugin(
25    self,
26    plugin: mrsm.core.Plugin,
27    make_archive: bool = True,
28    debug: bool = False,
29) -> SuccessTuple:
30    """Register a plugin and upload its archive."""
31    import json
32    archive_path = plugin.make_tar(debug=debug) if make_archive else plugin.archive_path
33    file_pointer = open(archive_path, 'rb')
34    files = {'archive': file_pointer}
35    metadata = {
36        'version': plugin.version,
37        'attributes': json.dumps(plugin.attributes),
38    }
39    r_url = plugin_r_url(plugin)
40    try:
41        response = self.post(r_url, files=files, params=metadata, debug=debug)
42    except Exception:
43        return False, f"Failed to register plugin '{plugin}'."
44    finally:
45        file_pointer.close()
46
47    try:
48        success, msg = json.loads(response.text)
49    except Exception:
50        return False, response.text
51
52    return success, msg

Register a plugin and upload its archive.

def install_plugin( self, name: str, skip_deps: bool = False, force: bool = False, debug: bool = False) -> Tuple[bool, str]:
55def install_plugin(
56    self,
57    name: str,
58    skip_deps: bool = False,
59    force: bool = False,
60    debug: bool = False
61) -> SuccessTuple:
62    """Download and attempt to install a plugin from the API."""
63    import os
64    import pathlib
65    import json
66    from meerschaum.core import Plugin
67    from meerschaum.config._paths import PLUGINS_TEMP_RESOURCES_PATH
68    from meerschaum.utils.debug import dprint
69    from meerschaum.utils.packages import attempt_import
70    binaryornot_check = attempt_import('binaryornot.check', lazy=False)
71    r_url = plugin_r_url(name)
72    dest = pathlib.Path(os.path.join(PLUGINS_TEMP_RESOURCES_PATH, name + '.tar.gz'))
73    if debug:
74        dprint(f"Fetching from '{self.url + r_url}' to '{dest}'...")
75    archive_path = self.wget(r_url, dest, debug=debug) 
76    is_binary = binaryornot_check.is_binary(str(archive_path))
77    if not is_binary:
78        fail_msg = f"Failed to download binary for plugin '{name}'."
79        try:
80            with open(archive_path, 'r') as f:
81                j = json.load(f)
82            if isinstance(j, list):
83                success, msg = tuple(j)
84            elif isinstance(j, dict) and 'detail' in j:
85                success, msg = False, fail_msg
86        except Exception:
87            success, msg = False, fail_msg
88        return success, msg
89    plugin = Plugin(name, archive_path=archive_path, repo_connector=self)
90    return plugin.install(skip_deps=skip_deps, force=force, debug=debug)

Download and attempt to install a plugin from the API.

def delete_plugin( self, plugin: meerschaum.Plugin, debug: bool = False) -> Tuple[bool, str]:
156def delete_plugin(
157    self,
158    plugin: mrsm.core.Plugin,
159    debug: bool = False
160) -> SuccessTuple:
161    """Delete a plugin from an API repository."""
162    import json
163    r_url = plugin_r_url(plugin)
164    try:
165        response = self.delete(r_url, debug=debug)
166    except Exception:
167        return False, f"Failed to delete plugin '{plugin}'."
168
169    try:
170        success, msg = json.loads(response.text)
171    except Exception:
172        return False, response.text
173
174    return success, msg

Delete a plugin from an API repository.

def get_plugins( self, user_id: Optional[int] = None, search_term: Optional[str] = None, debug: bool = False) -> List[str]:
 93def get_plugins(
 94    self,
 95    user_id: Optional[int] = None,
 96    search_term: Optional[str] = None,
 97    debug: bool = False
 98) -> List[str]:
 99    """Return a list of registered plugin names.
100
101    Parameters
102    ----------
103    user_id: Optional[int], default None
104        If specified, return all plugins from a certain user.
105
106    search_term: Optional[str], default None
107        If specified, return plugins beginning with this string.
108
109    Returns
110    -------
111    A list of plugin names.
112    """
113    import json
114    from meerschaum.utils.warnings import error
115    from meerschaum._internal.static import STATIC_CONFIG
116    response = self.get(
117        STATIC_CONFIG['api']['endpoints']['plugins'],
118        params = {'user_id': user_id, 'search_term': search_term},
119        use_token = True,
120        debug = debug
121    )
122    if not response:
123        return []
124    plugins = json.loads(response.text)
125    if not isinstance(plugins, list):
126        error(response.text)
127    return plugins

Return a list of registered plugin names.

Parameters
  • user_id (Optional[int], default None): If specified, return all plugins from a certain user.
  • search_term (Optional[str], default None): If specified, return plugins beginning with this string.
Returns
  • A list of plugin names.
def get_plugin_attributes( self, plugin: meerschaum.Plugin, debug: bool = False) -> Dict[str, Any]:
130def get_plugin_attributes(
131    self,
132    plugin: mrsm.core.Plugin,
133    debug: bool = False
134) -> Dict[str, Any]:
135    """
136    Return a plugin's attributes.
137    """
138    import json
139    from meerschaum.utils.warnings import warn, error
140    r_url = plugin_r_url(plugin) + '/attributes'
141    response = self.get(r_url, use_token=True, debug=debug)
142    attributes = response.json()
143    if isinstance(attributes, str) and attributes and attributes[0] == '{':
144        try:
145            attributes = json.loads(attributes)
146        except Exception:
147            pass
148    if not isinstance(attributes, dict):
149        error(response.text)
150    elif not response and 'detail' in attributes:
151        warn(attributes['detail'])
152        return {}
153    return attributes

Return a plugin's attributes.

def login( self, debug: bool = False, warn: bool = True, **kw: Any) -> Tuple[bool, str]:
19def login(
20    self,
21    debug: bool = False,
22    warn: bool = True,
23    **kw: Any
24) -> SuccessTuple:
25    """Log in and set the session token."""
26    if self.login_scheme == 'api_key':
27        validate_response = self.post(
28            STATIC_CONFIG['api']['endpoints']['tokens'] + '/validate',
29            headers={'Authorization': f'Bearer {self.api_key}'},
30            use_token=False,
31            debug=debug,
32        )
33        if not validate_response:
34            return False, "API key is not valid."
35        return True, "API key is valid."
36
37    try:
38        if self.login_scheme == 'password':
39            login_data = {
40                'username': self.username,
41                'password': self.password,
42            }
43        elif self.login_scheme == 'client_credentials':
44            login_data = {
45                'client_id': self.client_id,
46                'client_secret': self.client_secret,
47            }
48    except AttributeError:
49        login_data = {}
50
51    if not login_data:
52        return False, f"Please login with the command `login {self}`."
53
54    login_scheme_msg = (
55        f" as user '{login_data['username']}'"
56        if self.login_scheme == 'username'
57        else ''
58    )
59
60    response = self.post(
61        STATIC_CONFIG['api']['endpoints']['login'],
62        data=login_data,
63        use_token=False,
64        debug=debug,
65    )
66    if response:
67        msg = f"Successfully logged into '{self}'{login_scheme_msg}'."
68        self._token = json.loads(response.text)['access_token']
69        self._expires = datetime.datetime.strptime(
70            json.loads(response.text)['expires'], 
71            '%Y-%m-%dT%H:%M:%S.%f'
72        )
73    else:
74        msg = (
75            f"Failed to log into '{self}'{login_scheme_msg}.\n" +
76            f"    Please verify login details for connector '{self}'."
77        )
78        if warn and not self.__dict__.get('_emitted_warning', False):
79            _warn(msg, stack=False)
80            self._emitted_warning = True
81
82    return response.__bool__(), msg

Log in and set the session token.

def test_connection(self, **kw: Any) -> Optional[bool]:
 85def test_connection(
 86    self,
 87    **kw: Any
 88) -> Union[bool, None]:
 89    """Test if a successful connection to the API may be made."""
 90    from meerschaum.connectors.poll import retry_connect
 91    _default_kw = {
 92        'max_retries': 1, 'retry_wait': 0, 'warn': False,
 93        'connector': self, 'enforce_chaining': False,
 94        'enforce_login': False,
 95    }
 96    _default_kw.update(kw)
 97    try:
 98        return retry_connect(**_default_kw)
 99    except Exception:
100        return False

Test if a successful connection to the API may be made.

def register_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
70def register_user(
71    self,
72    user: mrsm.core.User,
73    debug: bool = False,
74    **kw: Any
75) -> SuccessTuple:
76    """Register a new user."""
77    from meerschaum._internal.static import STATIC_CONFIG
78    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/register"
79    data = {
80        'username': user.username,
81        'password': user.password,
82        'attributes': json.dumps(user.attributes),
83    }
84    if user.type:
85        data['type'] = user.type
86    if user.email:
87        data['email'] = user.email
88    response = self.post(r_url, data=data, debug=debug)
89    try:
90        _json = json.loads(response.text)
91        if isinstance(_json, dict) and 'detail' in _json:
92            return False, _json['detail']
93        success_tuple = tuple(_json)
94    except Exception:
95        msg = response.text if response else f"Failed to register user '{user}'."
96        return False, msg
97
98    return tuple(success_tuple)

Register a new user.

def get_user_id( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Union[int, str, uuid.UUID, NoneType]:
101def get_user_id(
102    self,
103    user: mrsm.core.User,
104    debug: bool = False,
105    **kw: Any
106) -> Union[int, str, UUID, None]:
107    """Get a user's ID."""
108    from meerschaum._internal.static import STATIC_CONFIG
109    from meerschaum.utils.misc import is_int, is_uuid
110    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/id"
111    response = self.get(r_url, debug=debug, **kw)
112    try:
113        id_text = str(json.loads(response.text))
114        if is_int(id_text):
115            user_id = int(id_text)
116        elif is_uuid(id_text):
117            user_id = UUID(id_text)
118        else:
119            user_id = id_text
120    except Exception as e:
121        user_id = None
122    return user_id

Get a user's ID.

def get_users(self, debug: bool = False, **kw: Any) -> List[str]:
19def get_users(
20    self,
21    debug: bool = False,
22    **kw: Any
23) -> List[str]:
24    """
25    Return a list of registered usernames.
26    """
27    from meerschaum._internal.static import STATIC_CONFIG
28    response = self.get(
29        f"{STATIC_CONFIG['api']['endpoints']['users']}",
30        debug = debug,
31        use_token = True,
32    )
33    if not response:
34        return []
35    try:
36        return response.json()
37    except Exception as e:
38        return []

Return a list of registered usernames.

def edit_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
41def edit_user(
42    self,
43    user: mrsm.core.User,
44    debug: bool = False,
45    **kw: Any
46) -> SuccessTuple:
47    """Edit an existing user."""
48    from meerschaum._internal.static import STATIC_CONFIG
49    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/edit"
50    data = {
51        'username': user.username,
52        'password': user.password,
53        'type': user.type,
54        'email': user.email,
55        'attributes': json.dumps(user.attributes),
56    }
57    response = self.post(r_url, data=data, debug=debug)
58    try:
59        _json = json.loads(response.text)
60        if isinstance(_json, dict) and 'detail' in _json:
61            return False, _json['detail']
62        success_tuple = tuple(_json)
63    except Exception:
64        msg = response.text if response else f"Failed to edit user '{user}'."
65        return False, msg
66
67    return tuple(success_tuple)

Edit an existing user.

def delete_user( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Tuple[bool, str]:
125def delete_user(
126    self,
127    user: mrsm.core.User,
128    debug: bool = False,
129    **kw: Any
130) -> SuccessTuple:
131    """Delete a user."""
132    from meerschaum._internal.static import STATIC_CONFIG
133    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}"
134    response = self.delete(r_url, debug=debug)
135    try:
136        _json = json.loads(response.text)
137        if isinstance(_json, dict) and 'detail' in _json:
138            return False, _json['detail']
139        success_tuple = tuple(_json)
140    except Exception:
141        success_tuple = False, f"Failed to delete user '{user.username}'."
142    return success_tuple

Delete a user.

def get_user_password_hash( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
166def get_user_password_hash(
167    self,
168    user: mrsm.core.User,
169    debug: bool = False,
170    **kw: Any
171) -> Optional[str]:
172    """If configured, get a user's password hash."""
173    from meerschaum._internal.static import STATIC_CONFIG
174    r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/password_hash'
175    response = self.get(r_url, debug=debug, **kw)
176    if not response:
177        return None
178    return response.json()

If configured, get a user's password hash.

def get_user_type( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw: Any) -> Optional[str]:
181def get_user_type(
182    self,
183    user: mrsm.core.User,
184    debug: bool = False,
185    **kw: Any
186) -> Optional[str]:
187    """If configured, get a user's type."""
188    from meerschaum._internal.static import STATIC_CONFIG
189    r_url = STATIC_CONFIG['api']['endpoints']['users'] + '/' + user.username + '/type'
190    response = self.get(r_url, debug=debug, **kw)
191    if not response:
192        return None
193    return response.json()

If configured, get a user's type.

def get_user_attributes( self, user: meerschaum.core.User._User.User, debug: bool = False, **kw) -> int:
145def get_user_attributes(
146    self,
147    user: mrsm.core.User,
148    debug: bool = False,
149    **kw
150) -> int:
151    """Get a user's attributes."""
152    from meerschaum._internal.static import STATIC_CONFIG
153    r_url = f"{STATIC_CONFIG['api']['endpoints']['users']}/{user.username}/attributes"
154    response = self.get(r_url, debug=debug, **kw)
155    try:
156        attributes = json.loads(response.text)
157    except Exception:
158        attributes = None
159    return attributes

Get a user's attributes.

def register_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
20def register_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
21    """
22    Register the provided token to the API.
23    """
24    from meerschaum.utils.dtypes import json_serialize_value
25    r_url = tokens_endpoint + '/register'
26    response = self.post(
27        r_url,
28        data=json.dumps({
29            'label': token.label,
30            'scopes': token.scopes,
31            'expiration': token.expiration,
32        }, default=json_serialize_value),
33        debug=debug,
34    )
35    if not response:
36        return False, f"Failed to register token:\n{response.text}"
37
38    data = response.json()
39    token.label = data['label']
40    token.secret = data['secret']
41    token.id = uuid.UUID(data['id'])
42    if data.get('expiration', None):
43        token.expiration = datetime.fromisoformat(data['expiration'])
44
45    return True, f"Registered token '{token.label}'."

Register the provided token to the API.

def get_token_model( self, token_id: uuid.UUID, debug: bool = False) -> 'Union[TokenModel, None]':
48def get_token_model(self, token_id: uuid.UUID, debug: bool = False) -> 'Union[TokenModel, None]':
49    """
50    Return a token's model from the API instance.
51    """
52    from meerschaum.models import TokenModel
53    r_url = tokens_endpoint + f'/{token_id}'
54    response = self.get(r_url, debug=debug)
55    if not response:
56        return None
57    data = response.json()
58    return TokenModel(**data)

Return a token's model from the API instance.

def get_tokens( self, labels: Optional[List[str]] = None, debug: bool = False) -> List[meerschaum.core.Token._Token.Token]:
61def get_tokens(self, labels: Optional[List[str]] = None, debug: bool = False) -> List[Token]:
62    """
63    Return the tokens registered to the current user.
64    """
65    from meerschaum.utils.warnings import warn
66    r_url = tokens_endpoint
67    params = {}
68    if labels:
69        params['labels'] = ','.join(labels)
70    response = self.get(r_url, params={'labels': labels}, debug=debug)
71    if not response:
72        warn(f"Could not get tokens from '{self}':\n{response.text}")
73        return []
74
75    tokens = [
76        Token(instance=self, **payload)
77        for payload in response.json()
78    ]
79    return tokens

Return the tokens registered to the current user.

def edit_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
 82def edit_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
 83    """
 84    Persist the token's in-memory state to the API.
 85    """
 86    r_url = tokens_endpoint + f"/{token.id}/edit"
 87    response = self.post(
 88        r_url,
 89        json={
 90            'creation': token.creation.isoformat() if token.creation else None,
 91            'expiration': token.expiration.isoformat() if token.expiration else None,
 92            'label': token.label,
 93            'is_valid': token.is_valid,
 94            'scopes': token.scopes,
 95        },
 96    )
 97    if not response:
 98        return False, f"Failed to edit token:\n{response.text}"
 99
100    success, msg = response.json()
101    return success, msg

Persist the token's in-memory state to the API.

def invalidate_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
104def invalidate_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
105    """
106    Invalidate the token, disabling it for future requests.
107    """
108    r_url = tokens_endpoint + f"/{token.id}/invalidate"
109    response = self.post(r_url)
110    if not response:
111        return False, f"Failed to invalidate token:\n{response.text}"
112
113    success, msg = response.json()
114    return success, msg

Invalidate the token, disabling it for future requests.

def get_token_scopes( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> List[str]:
117def get_token_scopes(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> List[str]:
118    """
119    Return the scopes for a token.
120    """
121    _token_id = (token_id.id if isinstance(token_id, Token) else token_id)
122    model = self.get_token_model(_token_id, debug=debug).scopes
123    return getattr(model, 'scopes', [])

Return the scopes for a token.

def token_exists( self, token_id: Union[uuid.UUID, meerschaum.core.Token._Token.Token], debug: bool = False) -> bool:
126def token_exists(self, token_id: Union[uuid.UUID, Token], debug: bool = False) -> bool:
127    """
128    Return `True` if a token exists.
129    """
130    _token_id = (token_id.id if isinstance(token_id, Token) else token_id)
131    model = self.get_token_model(_token_id, debug=debug)
132    if model is None:
133        return False
134    return model.creation is not None

Return True if a token exists.

def delete_token( self, token: meerschaum.core.Token._Token.Token, debug: bool = False) -> Tuple[bool, str]:
137def delete_token(self, token: Token, debug: bool = False) -> mrsm.SuccessTuple:
138    """
139    Delete the token from the API.
140    """
141    r_url = tokens_endpoint + f"/{token.id}"
142    response = self.delete(r_url, debug=debug)
143    if not response:
144        return False, f"Failed to delete token:\n{response.text}"
145    
146    success, msg = response.json()
147    return success, msg

Delete the token from the API.

@classmethod
def from_uri( cls, uri: str, label: Optional[str] = None, as_dict: bool = False) -> Union[APIConnector, Dict[str, Union[str, int]]]:
13@classmethod
14def from_uri(
15    cls,
16    uri: str,
17    label: Optional[str] = None,
18    as_dict: bool = False,
19) -> Union[
20        'meerschaum.connectors.APIConnector',
21        Dict[str, Union[str, int]],
22    ]:
23    """
24    Create a new APIConnector from a URI string.
25
26    Parameters
27    ----------
28    uri: str
29        The URI connection string.
30
31    label: Optional[str], default None
32        If provided, use this as the connector label.
33        Otherwise use the determined database name.
34
35    as_dict: bool, default False
36        If `True`, return a dictionary of the keyword arguments
37        necessary to create a new `APIConnector`, otherwise create a new object.
38
39    Returns
40    -------
41    A new APIConnector object or a dictionary of attributes (if `as_dict` is `True`).
42    """
43    from meerschaum.connectors.sql import SQLConnector
44    params = SQLConnector.parse_uri(uri)
45    if 'host' not in params:
46        error("No host was found in the provided URI.")
47    params['protocol'] = params.pop('flavor')
48    params['label'] = label or (
49        (
50            (params['username'] + '@' if 'username' in params else '')
51            + params['host']
52        ).lower()
53    )
54
55    return cls(**params) if not as_dict else params

Create a new APIConnector from a URI string.

Parameters
  • uri (str): The URI connection string.
  • label (Optional[str], default None): If provided, use this as the connector label. Otherwise use the determined database name.
  • as_dict (bool, default False): If True, return a dictionary of the keyword arguments necessary to create a new APIConnector, otherwise create a new object.
Returns
  • A new APIConnector object or a dictionary of attributes (if as_dict is True).
def get_jobs(self, debug: bool = False) -> Dict[str, meerschaum.Job]:
28def get_jobs(self, debug: bool = False) -> Dict[str, Job]:
29    """
30    Return a dictionary of remote jobs.
31    """
32    response = self.get(JOBS_ENDPOINT, debug=debug)
33    if not response:
34        warn(f"Failed to get remote jobs from {self}.")
35        return {}
36    return {
37        name: Job(
38            name,
39            job_meta['sysargs'],
40            executor_keys=str(self),
41            _properties=job_meta['daemon']['properties']
42        )
43        for name, job_meta in response.json().items()
44    }

Return a dictionary of remote jobs.

def get_job(self, name: str, debug: bool = False) -> meerschaum.Job:
47def get_job(self, name: str, debug: bool = False) -> Job:
48    """
49    Return a single Job object.
50    """
51    metadata = self.get_job_metadata(name, debug=debug)
52    if not metadata:
53        raise ValueError(f"Job '{name}' does not exist.")
54
55    return Job(
56        name,
57        metadata['sysargs'],
58        executor_keys=str(self),
59        _properties=metadata['daemon']['properties'],
60    )

Return a single Job object.

def get_job_metadata(self, name: str, debug: bool = False) -> Dict[str, Any]:
 63def get_job_metadata(self, name: str, debug: bool = False) -> Dict[str, Any]:
 64    """
 65    Return the metadata for a single job.
 66    """
 67    now = time.perf_counter()
 68    _job_metadata_cache = self.__dict__.get('_job_metadata_cache', None)
 69    _job_metadata_timestamp = (
 70        _job_metadata_cache.get(name, {}).get('timestamp', None)
 71    ) if _job_metadata_cache is not None else None
 72
 73    if (
 74        _job_metadata_timestamp is not None
 75        and (now - _job_metadata_timestamp) < JOB_METADATA_CACHE_SECONDS
 76    ):
 77        if debug:
 78            dprint(f"Returning cached metadata for job '{name}'.")
 79        return _job_metadata_cache[name]['metadata']
 80
 81    response = self.get(JOBS_ENDPOINT + f"/{name}", debug=debug)
 82    if not response:
 83        if debug:
 84            msg = (
 85                response.json()['detail']
 86                if 'detail' in response.text
 87                else response.text
 88            )
 89            warn(f"Failed to get metadata for job '{name}':\n{msg}")
 90        return {}
 91
 92    metadata = response.json()
 93    if _job_metadata_cache is None:
 94        self._job_metadata_cache = {}
 95
 96    self._job_metadata_cache[name] = {
 97        'timestamp': now,
 98        'metadata': metadata,
 99    }
100    return metadata

Return the metadata for a single job.

def get_job_properties(self, name: str, debug: bool = False) -> Dict[str, Any]:
102def get_job_properties(self, name: str, debug: bool = False) -> Dict[str, Any]:
103    """
104    Return the daemon properties for a single job.
105    """
106    metadata = self.get_job_metadata(name, debug=debug)
107    return metadata.get('daemon', {}).get('properties', {})

Return the daemon properties for a single job.

def get_job_exists(self, name: str, debug: bool = False) -> bool:
149def get_job_exists(self, name: str, debug: bool = False) -> bool:
150    """
151    Return whether a job exists.
152    """
153    response = self.get(JOBS_ENDPOINT + f'/{name}/exists', debug=debug)
154    if not response:
155        warn(f"Failed to determine whether job '{name}' exists.")
156        return False
157
158    return response.json()

Return whether a job exists.

def delete_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
161def delete_job(self, name: str, debug: bool = False) -> SuccessTuple:
162    """
163    Delete a job.
164    """
165    response = self.delete(JOBS_ENDPOINT + f"/{name}", debug=debug)
166    if not response:
167        if 'detail' in response.text:
168            return False, response.json()['detail']
169
170        return False, response.text
171
172    return tuple(response.json())

Delete a job.

def start_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
175def start_job(self, name: str, debug: bool = False) -> SuccessTuple:
176    """
177    Start a job.
178    """
179    response = self.post(JOBS_ENDPOINT + f"/{name}/start", debug=debug)
180    if not response:
181        if 'detail' in response.text:
182            return False, response.json()['detail']
183        return False, response.text
184
185    return tuple(response.json())

Start a job.

def create_job( self, name: str, sysargs: List[str], properties: Optional[Dict[str, str]] = None, debug: bool = False) -> Tuple[bool, str]:
188def create_job(
189    self,
190    name: str,
191    sysargs: List[str],
192    properties: Optional[Dict[str, str]] = None,
193    debug: bool = False,
194) -> SuccessTuple:
195    """
196    Create a job.
197    """
198    response = self.post(
199        JOBS_ENDPOINT + f"/{name}",
200        json={
201            'sysargs': sysargs,
202            'properties': properties,
203        },
204        debug=debug,
205    )
206    if not response:
207        if 'detail' in response.text:
208            return False, response.json()['detail']
209        return False, response.text
210
211    return tuple(response.json())

Create a job.

def stop_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
214def stop_job(self, name: str, debug: bool = False) -> SuccessTuple:
215    """
216    Stop a job.
217    """
218    response = self.post(JOBS_ENDPOINT + f"/{name}/stop", debug=debug)
219    if not response:
220        if 'detail' in response.text:
221            return False, response.json()['detail']
222        return False, response.text
223
224    return tuple(response.json())

Stop a job.

def pause_job(self, name: str, debug: bool = False) -> Tuple[bool, str]:
227def pause_job(self, name: str, debug: bool = False) -> SuccessTuple:
228    """
229    Pause a job.
230    """
231    response = self.post(JOBS_ENDPOINT + f"/{name}/pause", debug=debug)
232    if not response:
233        if 'detail' in response.text:
234            return False, response.json()['detail']
235        return False, response.text
236
237    return tuple(response.json())

Pause a job.

def get_logs(self, name: str, debug: bool = False) -> str:
240def get_logs(self, name: str, debug: bool = False) -> str:
241    """
242    Return the logs for a job.
243    """
244    response = self.get(LOGS_ENDPOINT + f"/{name}")
245    if not response:
246        raise ValueError(f"Cannot fetch logs for job '{name}':\n{response.text}")
247
248    return response.json()

Return the logs for a job.

def get_job_stop_time(self, name: str, debug: bool = False) -> Optional[datetime.datetime]:
251def get_job_stop_time(self, name: str, debug: bool = False) -> Union[datetime, None]:
252    """
253    Return the job's manual stop time.
254    """
255    response = self.get(JOBS_ENDPOINT + f"/{name}/stop_time")
256    if not response:
257        warn(f"Failed to get stop time for job '{name}':\n{response.text}")
258        return None
259
260    data = response.json()
261    if data is None:
262        return None
263
264    return datetime.fromisoformat(data)

Return the job's manual stop time.

def monitor_logs( self, name: str, callback_function: Callable[[Any], Any], input_callback_function: Callable[[NoneType], str], stop_callback_function: Callable[[NoneType], str], stop_on_exit: bool = False, strip_timestamps: bool = False, accept_input: bool = True, debug: bool = False):
348def monitor_logs(
349    self,
350    name: str,
351    callback_function: Callable[[Any], Any],
352    input_callback_function: Callable[[None], str],
353    stop_callback_function: Callable[[None], str],
354    stop_on_exit: bool = False,
355    strip_timestamps: bool = False,
356    accept_input: bool = True,
357    debug: bool = False,
358):
359    """
360    Monitor a job's log files and execute a callback with the changes.
361    """
362    return asyncio.run(
363        self.monitor_logs_async(
364            name,
365            callback_function,
366            input_callback_function=input_callback_function,
367            stop_callback_function=stop_callback_function,
368            stop_on_exit=stop_on_exit,
369            strip_timestamps=strip_timestamps,
370            accept_input=accept_input,
371            debug=debug
372        )
373    )

Monitor a job's log files and execute a callback with the changes.

async def monitor_logs_async( self, name: str, callback_function: Callable[[Any], Any], input_callback_function: Callable[[], str], stop_callback_function: Callable[[Tuple[bool, str]], str], stop_on_exit: bool = False, strip_timestamps: bool = False, accept_input: bool = True, debug: bool = False):
267async def monitor_logs_async(
268    self,
269    name: str,
270    callback_function: Callable[[Any], Any],
271    input_callback_function: Callable[[], str],
272    stop_callback_function: Callable[[SuccessTuple], str],
273    stop_on_exit: bool = False,
274    strip_timestamps: bool = False,
275    accept_input: bool = True,
276    debug: bool = False,
277):
278    """
279    Monitor a job's log files and await a callback with the changes.
280    """
281    import traceback
282    from meerschaum.jobs import StopMonitoringLogs
283    from meerschaum.utils.formatting._jobs import strip_timestamp_from_line
284
285    websockets, websockets_exceptions = mrsm.attempt_import('websockets', 'websockets.exceptions')
286    protocol = 'ws' if self.URI.startswith('http://') else 'wss'
287    port = self.port if 'port' in self.__dict__ else ''
288    uri = f"{protocol}://{self.host}:{port}{LOGS_ENDPOINT}/{name}/ws"
289
290    async def _stdin_callback(client):
291        if input_callback_function is None:
292            return
293
294        if asyncio.iscoroutinefunction(input_callback_function):
295            data = await input_callback_function()
296        else:
297            data = input_callback_function()
298
299        await client.send(data)
300
301    async def _stop_callback(client):
302        try:
303            result = tuple(json.loads(await client.recv()))
304        except Exception as e:
305            warn(traceback.format_exc())
306            result = False, str(e)
307
308        if stop_callback_function is not None:
309            if asyncio.iscoroutinefunction(stop_callback_function):
310                await stop_callback_function(result)
311            else:
312                stop_callback_function(result)
313
314        if stop_on_exit:
315            raise StopMonitoringLogs
316
317    message_callbacks = {
318        JOBS_STDIN_MESSAGE: _stdin_callback,
319        JOBS_STOP_MESSAGE: _stop_callback,
320    }
321
322    async with websockets.connect(uri) as websocket:
323        try:
324            await websocket.send(self.token or 'no-login')
325        except websockets_exceptions.ConnectionClosedOK:
326            pass
327
328        while True:
329            try:
330                response = await websocket.recv()
331                callback = message_callbacks.get(response, None)
332                if callback is not None:
333                    await callback(websocket)
334                    continue
335
336                if strip_timestamps:
337                    response = strip_timestamp_from_line(response)
338
339                if asyncio.iscoroutinefunction(callback_function):
340                    await callback_function(response)
341                else:
342                    callback_function(response)
343            except (KeyboardInterrupt, StopMonitoringLogs):
344                await websocket.close()
345                break

Monitor a job's log files and await a callback with the changes.

def get_job_is_blocking_on_stdin(self, name: str, debug: bool = False) -> bool:
375def get_job_is_blocking_on_stdin(self, name: str, debug: bool = False) -> bool:
376    """
377    Return whether a remote job is blocking on stdin.
378    """
379    response = self.get(JOBS_ENDPOINT + f'/{name}/is_blocking_on_stdin', debug=debug)
380    if not response:
381        return False
382
383    return response.json()

Return whether a remote job is blocking on stdin.

def get_job_began(self, name: str, debug: bool = False) -> Optional[str]:
116def get_job_began(self, name: str, debug: bool = False) -> Union[str, None]:
117    """
118    Return a job's `began` timestamp, if it exists.
119    """
120    properties = self.get_job_properties(name, debug=debug)
121    began_str = properties.get('daemon', {}).get('began', None)
122    if began_str is None:
123        return None
124
125    return began_str

Return a job's began timestamp, if it exists.

def get_job_ended(self, name: str, debug: bool = False) -> Optional[str]:
127def get_job_ended(self, name: str, debug: bool = False) -> Union[str, None]:
128    """
129    Return a job's `ended` timestamp, if it exists.
130    """
131    properties = self.get_job_properties(name, debug=debug)
132    ended_str = properties.get('daemon', {}).get('ended', None)
133    if ended_str is None:
134        return None
135
136    return ended_str

Return a job's ended timestamp, if it exists.

def get_job_paused(self, name: str, debug: bool = False) -> Optional[str]:
138def get_job_paused(self, name: str, debug: bool = False) -> Union[str, None]:
139    """
140    Return a job's `paused` timestamp, if it exists.
141    """
142    properties = self.get_job_properties(name, debug=debug)
143    paused_str = properties.get('daemon', {}).get('paused', None)
144    if paused_str is None:
145        return None
146
147    return paused_str

Return a job's paused timestamp, if it exists.

def get_job_status(self, name: str, debug: bool = False) -> str:
109def get_job_status(self, name: str, debug: bool = False) -> str:
110    """
111    Return the job's status.
112    """
113    metadata = self.get_job_metadata(name, debug=debug)
114    return metadata.get('status', 'stopped')

Return the job's status.

def get_connector( type: str = None, label: str = None, refresh: bool = False, debug: bool = False, _load_plugins: bool = True, **kw: Any) -> Connector:
 68def get_connector(
 69    type: str = None,
 70    label: str = None,
 71    refresh: bool = False,
 72    debug: bool = False,
 73    _load_plugins: bool = True,
 74    **kw: Any
 75) -> Connector:
 76    """
 77    Return existing connector or create new connection and store for reuse.
 78    
 79    You can create new connectors if enough parameters are provided for the given type and flavor.
 80
 81    Parameters
 82    ----------
 83    type: Optional[str], default None
 84        Connector type (sql, api, etc.).
 85        Defaults to the type of the configured `instance_connector`.
 86
 87    label: Optional[str], default None
 88        Connector label (e.g. main). Defaults to `'main'`.
 89
 90    refresh: bool, default False
 91        Refresh the Connector instance / construct new object. Defaults to `False`.
 92
 93    kw: Any
 94        Other arguments to pass to the Connector constructor.
 95        If the Connector has already been constructed and new arguments are provided,
 96        `refresh` is set to `True` and the old Connector is replaced.
 97
 98    Returns
 99    -------
100    A new Meerschaum connector (e.g. `meerschaum.connectors.api.APIConnector`,
101    `meerschaum.connectors.sql.SQLConnector`).
102    
103    Examples
104    --------
105    The following parameters would create a new
106    `meerschaum.connectors.sql.SQLConnector` that isn't in the configuration file.
107
108    ```
109    >>> conn = get_connector(
110    ...     type = 'sql',
111    ...     label = 'newlabel',
112    ...     flavor = 'sqlite',
113    ...     database = '/file/path/to/database.db'
114    ... )
115    >>>
116    ```
117
118    """
119    from meerschaum.connectors.parse import parse_instance_keys
120    from meerschaum.config import get_config
121    from meerschaum._internal.static import STATIC_CONFIG
122    from meerschaum.utils.warnings import warn
123    global _loaded_plugin_connectors
124    if isinstance(type, str) and not label and ':' in type:
125        type, label = type.split(':', maxsplit=1)
126
127    if _load_plugins:
128        with _locks['_loaded_plugin_connectors']:
129            if not _loaded_plugin_connectors:
130                load_plugin_connectors()
131                _load_builtin_custom_connectors()
132                _loaded_plugin_connectors = True
133
134    if type is None and label is None:
135        default_instance_keys = get_config('meerschaum', 'instance', patch=True)
136        ### recursive call to get_connector
137        return parse_instance_keys(default_instance_keys)
138
139    ### NOTE: the default instance connector may not be main.
140    ### Only fall back to 'main' if the type is provided by the label is omitted.
141    label = label if label is not None else STATIC_CONFIG['connectors']['default_label']
142
143    ### type might actually be a label. Check if so and raise a warning.
144    if type not in connectors:
145        possibilities, poss_msg = [], ""
146        for _type in get_config('meerschaum', 'connectors'):
147            if type in get_config('meerschaum', 'connectors', _type):
148                possibilities.append(f"{_type}:{type}")
149        if len(possibilities) > 0:
150            poss_msg = " Did you mean"
151            for poss in possibilities[:-1]:
152                poss_msg += f" '{poss}',"
153            if poss_msg.endswith(','):
154                poss_msg = poss_msg[:-1]
155            if len(possibilities) > 1:
156                poss_msg += " or"
157            poss_msg += f" '{possibilities[-1]}'?"
158
159        warn(f"Cannot create Connector of type '{type}'." + poss_msg, stack=False)
160        return None
161
162    if 'sql' not in types:
163        from meerschaum.connectors.plugin import PluginConnector
164        from meerschaum.connectors.valkey import ValkeyConnector
165        with _locks['types']:
166            types.update({
167                'api': APIConnector,
168                'sql': SQLConnector,
169                'plugin': PluginConnector,
170                'valkey': ValkeyConnector,
171            })
172
173    ### determine if we need to call the constructor
174    if not refresh:
175        ### see if any user-supplied arguments differ from the existing instance
176        if label in connectors[type]:
177            warning_message = None
178            for attribute, value in kw.items():
179                if attribute not in connectors[type][label].meta:
180                    import inspect
181                    cls = connectors[type][label].__class__
182                    cls_init_signature = inspect.signature(cls)
183                    cls_init_params = cls_init_signature.parameters
184                    if attribute not in cls_init_params:
185                        warning_message = (
186                            f"Received new attribute '{attribute}' not present in connector " +
187                            f"{connectors[type][label]}.\n"
188                        )
189                elif connectors[type][label].__dict__[attribute] != value:
190                    warning_message = (
191                        f"Mismatched values for attribute '{attribute}' in connector "
192                        + f"'{connectors[type][label]}'.\n" +
193                        f"  - Keyword value: '{value}'\n" +
194                        f"  - Existing value: '{connectors[type][label].__dict__[attribute]}'\n"
195                    )
196            if warning_message is not None:
197                warning_message += (
198                    "\nSetting `refresh` to True and recreating connector with type:"
199                    + f" '{type}' and label '{label}'."
200                )
201                refresh = True
202                warn(warning_message)
203        else: ### connector doesn't yet exist
204            refresh = True
205
206    ### only create an object if refresh is True
207    ### (can be manually specified, otherwise determined above)
208    if refresh:
209        with _locks['connectors']:
210            try:
211                ### will raise an error if configuration is incorrect / missing
212                conn = types[type](label=label, **kw)
213                connectors[type][label] = conn
214            except InvalidAttributesError as ie:
215                warn(
216                    f"Incorrect attributes for connector '{type}:{label}'.\n"
217                    + str(ie),
218                    stack = False,
219                )
220                conn = None
221            except Exception as e:
222                from meerschaum.utils.formatting import get_console
223                console = get_console()
224                if console:
225                    console.print_exception()
226                warn(
227                    f"Exception when creating connector '{type}:{label}'.\n" + str(e),
228                    stack = False,
229                )
230                conn = None
231        if conn is None:
232            return None
233
234    return connectors[type][label]

Return existing connector or create new connection and store for reuse.

You can create new connectors if enough parameters are provided for the given type and flavor.

Parameters
  • type (Optional[str], default None): Connector type (sql, api, etc.). Defaults to the type of the configured instance_connector.
  • label (Optional[str], default None): Connector label (e.g. main). Defaults to 'main'.
  • refresh (bool, default False): Refresh the Connector instance / construct new object. Defaults to False.
  • kw (Any): Other arguments to pass to the Connector constructor. If the Connector has already been constructed and new arguments are provided, refresh is set to True and the old Connector is replaced.
Returns
Examples

The following parameters would create a new meerschaum.connectors.sql.SQLConnector that isn't in the configuration file.

>>> conn = get_connector(
...     type = 'sql',
...     label = 'newlabel',
...     flavor = 'sqlite',
...     database = '/file/path/to/database.db'
... )
>>>
def is_connected(keys: str, **kw) -> bool:
237def is_connected(keys: str, **kw) -> bool:
238    """
239    Check if the connector keys correspond to an active connection.
240    If the connector has not been created, it will immediately return `False`.
241    If the connector exists but cannot communicate with the source, return `False`.
242    
243    **NOTE:** Only works with instance connectors (`SQLConnectors` and `APIConnectors`).
244    Keyword arguments are passed to `meerschaum.connectors.poll.retry_connect`.
245
246    Parameters
247    ----------
248    keys:
249        The keys to the connector (e.g. `'sql:main'`).
250        
251    Returns
252    -------
253    A `bool` corresponding to whether a successful connection may be made.
254
255    """
256    import warnings
257    if ':' not in keys:
258        warn(f"Invalid connector keys '{keys}'")
259
260    try:
261        typ, label = keys.split(':')
262    except Exception:
263        return False
264    if typ not in instance_types:
265        return False
266    if label not in connectors.get(typ, {}):
267        return False
268
269    from meerschaum.connectors.parse import parse_instance_keys
270    conn = parse_instance_keys(keys)
271    try:
272        with warnings.catch_warnings():
273            warnings.filterwarnings('ignore')
274            return conn.test_connection(**kw)
275    except Exception:
276        return False

Check if the connector keys correspond to an active connection. If the connector has not been created, it will immediately return False. If the connector exists but cannot communicate with the source, return False.

NOTE: Only works with instance connectors (SQLConnectors and APIConnectors). Keyword arguments are passed to meerschaum.connectors.poll.retry_connect.

Parameters
  • keys:: The keys to the connector (e.g. 'sql:main').
Returns
  • A bool corresponding to whether a successful connection may be made.